# MOVIE GENRE CLASSIFIER

## Importing Libraries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

## Reading the Dataset

In [2]:
column_names = ['ID','TITLE','GENRE','DESCRIPTION']

train = pd.read_csv('train_data.txt',sep = ':::',names = column_names)
train

  train = pd.read_csv('train_data.txt',sep = ':::',names = column_names)


Unnamed: 0,ID,TITLE,GENRE,DESCRIPTION
0,1,Oscar et la dame rose (2009),drama,Listening in to a conversation between his do...
1,2,Cupid (1997),thriller,A brother and sister with a past incestuous r...
2,3,"Young, Wild and Wonderful (1980)",adult,As the bus empties the students for their fie...
3,4,The Secret Sin (1915),drama,To help their unemployed father make ends mee...
4,5,The Unrecovered (2007),drama,The film's title refers not only to the un-re...
...,...,...,...,...
54209,54210,"""Bonino"" (1953)",comedy,This short-lived NBC live sitcom centered on ...
54210,54211,Dead Girls Don't Cry (????),horror,The NEXT Generation of EXPLOITATION. The sist...
54211,54212,Ronald Goedemondt: Ze bestaan echt (2008),documentary,"Ze bestaan echt, is a stand-up comedy about g..."
54212,54213,Make Your Own Bed (1944),comedy,Walter and Vivian live in the country and hav...


In [3]:
column_names = ['ID','TITLE','DESCRIPTION']

test = pd.read_csv('test_data.txt',sep = ':::',names = column_names)
test

  test = pd.read_csv('test_data.txt',sep = ':::',names = column_names)


Unnamed: 0,ID,TITLE,DESCRIPTION
0,1,Edgar's Lunch (1998),"L.R. Brane loves his life - his car, his apar..."
1,2,La guerra de papá (1977),"Spain, March 1964: Quico is a very naughty ch..."
2,3,Off the Beaten Track (2010),One year in the life of Albin and his family ...
3,4,Meu Amigo Hindu (2015),"His father has died, he hasn't spoken with hi..."
4,5,Er nu zhai (1955),Before he was known internationally as a mart...
...,...,...,...
54195,54196,"""Tales of Light & Dark"" (2013)","Covering multiple genres, Tales of Light & Da..."
54196,54197,Der letzte Mohikaner (1965),As Alice and Cora Munro attempt to find their...
54197,54198,Oliver Twink (2007),A movie 169 years in the making. Oliver Twist...
54198,54199,Slipstream (1973),"Popular, but mysterious rock D.J Mike Mallard..."


In [4]:
column_names = ['ID','TITLE','GENRE','DESCRIPTION']

test_soln = pd.read_csv('test_data_solution.txt',sep = ':::',names = column_names)
test_soln

  test_soln = pd.read_csv('test_data_solution.txt',sep = ':::',names = column_names)


Unnamed: 0,ID,TITLE,GENRE,DESCRIPTION
0,1,Edgar's Lunch (1998),thriller,"L.R. Brane loves his life - his car, his apar..."
1,2,La guerra de papá (1977),comedy,"Spain, March 1964: Quico is a very naughty ch..."
2,3,Off the Beaten Track (2010),documentary,One year in the life of Albin and his family ...
3,4,Meu Amigo Hindu (2015),drama,"His father has died, he hasn't spoken with hi..."
4,5,Er nu zhai (1955),drama,Before he was known internationally as a mart...
...,...,...,...,...
54195,54196,"""Tales of Light & Dark"" (2013)",horror,"Covering multiple genres, Tales of Light & Da..."
54196,54197,Der letzte Mohikaner (1965),western,As Alice and Cora Munro attempt to find their...
54197,54198,Oliver Twink (2007),adult,A movie 169 years in the making. Oliver Twist...
54198,54199,Slipstream (1973),drama,"Popular, but mysterious rock D.J Mike Mallard..."


## Splitting the Dataset into Training and Testing Data

In [5]:
X_train = train['DESCRIPTION']
y_train = train['GENRE']
X_test = test['DESCRIPTION']
y_test_solution = test_soln['GENRE']

## Vectorize the text using TF-IDF

In [6]:
tfidf = TfidfVectorizer(max_features=5000, stop_words='english')
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

In [7]:
models = {
    "Naive bias": MultinomialNB(),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Support vector": LinearSVC()
}

In [8]:
predictions = {}

## Training all the Models

In [9]:
for modelName,model in models.items():
    print(f"Training of {modelName}")
    model.fit(X_train_tfidf,y_train)
    y_pred = model.predict(X_test_tfidf)
    predictions[modelName] = y_pred
    accuracy = accuracy_score(y_test_solution, y_pred)
    print(f"Accuracy of {modelName}:", accuracy,"\n")

Training of Naive bias
Accuracy of Naive bias: 0.5238560885608856 

Training of Logistic Regression
Accuracy of Logistic Regression: 0.583929889298893 

Training of Support vector




Accuracy of Support vector: 0.5712361623616237 



Predict function 

In [10]:
def predict(title,description):
    input_data = tfidf.transform([description])
    naive_bias = models["Naive bias"].predict(input_data)
    return naive_bias[0]

## User input and Prediction

In [12]:
print("Enter the Movie details:")
title = input("Movie Title: ")
description = input("Movie Description: ")
output_genre = predict(title,description)
print("\n",output_genre)

Enter the Movie details:
Movie Title: Off the Beaten Track
Movie Description: One year in the life of Albin and his family of shepherds in the North of Transylvania. In direct cinema style, this documentary follows their day to day routines, and their struggle to adapt to a new world where traditions are gradually replaced by modern values. Since joining the EU, Romania has been facing, like several other Eastern European countries, the pressure of modern values, introducing in farmer's lives the cruel notion of competition, the temptation of migrating to the higher salaries abroad, and the marginalization of locally produced food against industrial products.

  documentary 
