In [1]:
# Install required libraries
!pip install scikit-learn pandas joblib kaggle

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# 2. Download dataset
!kaggle datasets download therohithanand/movie-genre-classification
!unzip -q movie-genre-classification.zip -d movie_genre_data

Dataset URL: https://www.kaggle.com/datasets/therohithanand/movie-genre-classification
License(s): CC-BY-SA-4.0
Downloading movie-genre-classification.zip to /content
  0% 0.00/1.52M [00:00<?, ?B/s]
100% 1.52M/1.52M [00:00<00:00, 568MB/s]


In [3]:
import pandas as pd
df = pd.read_csv('movie_genre_data/movie_genre_classification_final.csv')

In [5]:
df = df[['Description', 'Genre']].dropna()
print("Dataset size:", df.shape)

import re

def clean_text(text):
    text = text.lower()
    text = re.sub(r"<[^>]+>", "", text)
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"[^a-z\s]", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

df['clean_description'] = df['Description'].apply(clean_text)


from sklearn.model_selection import train_test_split

X = df['clean_description']
y = df['Genre']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(max_features=10000, ngram_range=(1,2))
X_train_vec = tfidf.fit_transform(X_train)
X_test_vec = tfidf.transform(X_test)


from sklearn.naive_bayes import MultinomialNB

model = MultinomialNB()
model.fit(X_train_vec, y_train)


from sklearn.metrics import accuracy_score, classification_report

y_pred = model.predict(X_test_vec)
accuracy = accuracy_score(y_test, y_pred)

print(f"\n Accuracy: {accuracy * 100:.2f}%")
print("\n Classification Report:\n", classification_report(y_test, y_pred))


import joblib

joblib.dump(model, 'genre_classifier.pkl')
joblib.dump(tfidf, 'tfidf_vectorizer.pkl')
print("💾 Model and vectorizer saved!")

from google.colab import files
files.download('genre_classifier.pkl')
files.download('tfidf_vectorizer.pkl')


Dataset size: (50000, 2)

 Accuracy: 100.00%

 Classification Report:
               precision    recall  f1-score   support

      Action       1.00      1.00      1.00      1421
      Comedy       1.00      1.00      1.00      1412
       Drama       1.00      1.00      1.00      1437
     Fantasy       1.00      1.00      1.00      1420
      Horror       1.00      1.00      1.00      1452
     Romance       1.00      1.00      1.00      1434
    Thriller       1.00      1.00      1.00      1424

    accuracy                           1.00     10000
   macro avg       1.00      1.00      1.00     10000
weighted avg       1.00      1.00      1.00     10000

💾 Model and vectorizer saved!


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [6]:
import joblib
import re

model = joblib.load('genre_classifier.pkl')
vectorizer = joblib.load('tfidf_vectorizer.pkl')

def clean_text(text):
    text = text.lower()
    text = re.sub(r"<[^>]+>", "", text)
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"[^a-z\s]", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

while True:
    print("\n🎬 Enter a movie description (or type 'exit' to stop):")
    user_input = input()

    if user_input.lower() == 'exit':
        print("Session ended.")
        break

    cleaned_input = clean_text(user_input)
    vectorized_input = vectorizer.transform([cleaned_input])
    prediction = model.predict(vectorized_input)[0]

    print(f" Predicted Genre: {prediction}")



🎬 Enter a movie description (or type 'exit' to stop):
A former hitman comes out of retirement to track down the gangsters that took everything from him
 Predicted Genre: Comedy

🎬 Enter a movie description (or type 'exit' to stop):
x
 Predicted Genre: Horror

🎬 Enter a movie description (or type 'exit' to stop):
exit
Session ended.


In [7]:
import pickle

with open("genre_model.pkl", "wb") as f:
    pickle.dump(model, f)

with open("tfidf_vectorizer.pkl", "wb") as f:
    pickle.dump(tfidf, f)


In [8]:
!pip install flask flask-ngrok pyngrok scikit-learn


Collecting flask-ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl.metadata (1.8 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.12-py3-none-any.whl.metadata (9.4 kB)
Downloading flask_ngrok-0.0.25-py3-none-any.whl (3.1 kB)
Downloading pyngrok-7.2.12-py3-none-any.whl (26 kB)
Installing collected packages: pyngrok, flask-ngrok
Successfully installed flask-ngrok-0.0.25 pyngrok-7.2.12


In [9]:
import os

os.makedirs("templates", exist_ok=True)

html_content = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Movie Genre Predictor</title>
</head>
<body>
    <h2>Enter a Movie Description</h2>
    <form method="post">
        <textarea name="description" rows="5" cols="50" required></textarea><br><br>
        <button type="submit">Predict Genre</button>
    </form>
    {% if prediction %}
        <h3>Predicted Genre: {{ prediction }}</h3>
    {% endif %}
</body>
</html>


"""

#  Save to templates/index.html
with open("templates/index.html", "w", encoding="utf-8") as f:
    f.write(html_content)

print(" templates/index.html saved successfully!")

 templates/index.html saved successfully!


In [None]:
from flask import Flask, request, render_template_string
from pyngrok import ngrok
import pickle

# Load model and vectorizer
with open("genre_model.pkl", "rb") as f:
    model = pickle.load(f)

with open("tfidf_vectorizer.pkl", "rb") as f:
    vectorizer = pickle.load(f)

# Flask app setup
app = Flask(__name__)

HTML_TEMPLATE = """
<!DOCTYPE html>
<html>
<head>
    <title>Movie Genre Classifier</title>
</head>
<body style="font-family: Arial; padding: 30px; max-width: 600px; margin: auto;">
    <h2>Enter Movie Description</h2>
    <form method="post">
        <textarea name="description" rows="6" cols="60" placeholder="Enter the movie plot..." required></textarea><br><br>
        <button type="submit">Predict Genre</button>
    </form>
    {% if prediction %}
        <h3>Predicted Genre: <span style="color: green;">{{ prediction }}</span></h3>
    {% endif %}
</body>
</html>
"""

@app.route("/", methods=["GET", "POST"])
def index():
    prediction = None
    if request.method == "POST":
        desc = request.form["description"]
        vec = vectorizer.transform([desc])
        pred = model.predict(vec)[0]
        prediction = pred
    return render_template_string(HTML_TEMPLATE, prediction=prediction)

# Launch with ngrok
ngrok.set_auth_token("30MW9yFrnnvBU9EucDW5KKxM8jR_4ZnJMzEAiz1vhMgZzgQxh")
public_url = ngrok.connect(5000)
print(" * ngrok tunnel:", public_url)
app.run(port=5000)


 * ngrok tunnel: NgrokTunnel: "https://d371350aff33.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [25/Jul/2025 10:11:54] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [25/Jul/2025 10:11:55] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [25/Jul/2025 10:12:00] "POST / HTTP/1.1" 200 -
