# Genre Classification Model

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [None]:
def load_train(path):
    data=[]
    with open(path,'r',encoding="utf-8") as file:
        for line in file:
            word=line.strip().split(":::")
            if len(word)==4:
                _id, title, genre, desc = map(str.strip, word)
                data.append([_id,title,genre,desc])
    return pd.DataFrame(data,columns=["ID","Title","Genre","Description"])

In [None]:
def load_test(path):
    data=[]
    with open(path,'r',encoding="utf-8") as file:
        for line in file:
            word=line.strip().split(":::")
            if len(word)==3:
                _id, title, desc = map(str.strip, word)
                data.append([_id,title,desc])
    return pd.DataFrame(data,columns=["ID","Title","Description"])

In [None]:
def soln(path):
    data=[]
    with open(path,'r',encoding="utf-8") as file:
        for line in file:
            word=line.strip().split(":::")
            if len(word)>=3:
                _id=word[0].strip()
                genre=word[2].strip()
                data.append([_id,genre])
    return pd.DataFrame(data,columns=["ID","Genre"])

In [None]:
train=load_train("/content/drive/My Drive/codsoft/movie/dataset/train_data.txt")
test=load_test("/content/drive/My Drive/codsoft/movie/dataset/test_data.txt")
solution=soln("/content/drive/My Drive/codsoft/movie/dataset/test_data_solution.txt")

In [None]:
train["ID"] = train["ID"].astype(str).str.strip()
test["ID"] = test["ID"].astype(str).str.strip()
solution["ID"] = solution["ID"].astype(str).str.strip()

In [None]:
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(train["Description"])
y_train = train["Genre"]

In [68]:
model = LogisticRegression(max_iter=1000,class_weight="balanced")
model.fit(X_train, y_train)

In [70]:
X_test = vectorizer.transform(test["Description"])
predictions = model.predict(X_test)
test["Predicted_Genre"] = predictions

In [71]:
result_df = test.merge(solution, on="ID")

In [75]:
print("Sample Predictions:\n", result_df[["ID", "Predicted_Genre","Genre_y"]].head())
print("\nAccuracy:", accuracy_score(result_df["Predicted_Genre"],result_df["Genre_y"], ))
print("\nClassification Report:\n")
print(classification_report(result_df["Predicted_Genre"],result_df["Genre_y"]))

Sample Predictions:
   ID Predicted_Genre      Genre_y
0  1          comedy     thriller
1  2           drama       comedy
2  3     documentary  documentary
3  4           drama        drama
4  5       biography        drama

Accuracy: 0.49546125461254614

Classification Report:

              precision    recall  f1-score   support

      action       0.50      0.33      0.40      1965
       adult       0.67      0.36      0.47      1104
   adventure       0.33      0.25      0.28      1039
   animation       0.30      0.23      0.26       631
   biography       0.17      0.06      0.09       771
      comedy       0.47      0.61      0.53      5743
       crime       0.35      0.15      0.21      1185
 documentary       0.62      0.79      0.69     10321
       drama       0.41      0.70      0.52      7937
      family       0.31      0.16      0.21      1507
     fantasy       0.25      0.15      0.19       549
   game-show       0.75      0.66      0.70       219
     history    

In [77]:
description = input("\nEnter a movie description to predict its genre:\n")

X_input = vectorizer.transform([description])
predicted_genre = model.predict(X_input)[0]
print("\nInput Description:")
print(description)
print("Predicted Genre:", predicted_genre)


Enter a movie description to predict its genre:
Jojo Johnson, David "DJ" John, Shifas "Valuth" Ahmed, Shifas "Cheruth" Ali, and Shanavas are five friends from Alappuzha who have just completed their 12th-grade board exams. While Shanavas passes, the rest of the group fails, leaving their prospects of college admission uncertain.  After Valuth gets into a scuffle for flirting with a state-level boxer’s girlfriend, Jojo begins researching alternative ways to enter college and discovers that sports quota admissions through boxing tournaments are a viable route. The group decides to pursue boxing and enroll at the local Alappuzha Gymkhana boxing school, under coach Salim. However, Shanavas soon loses interest and quits, while DJ switches to wrestling.

Input Description:
Jojo Johnson, David "DJ" John, Shifas "Valuth" Ahmed, Shifas "Cheruth" Ali, and Shanavas are five friends from Alappuzha who have just completed their 12th-grade board exams. While Shanavas passes, the rest of the group f