In [2]:
!pip install pandas numpy scikit-learn tensorflow tqdm




In [5]:

!pip install tensorflow==2.12




[31mERROR: Could not find a version that satisfies the requirement tensorflow==2.12 (from versions: 2.16.0rc0, 2.16.1, 2.16.2, 2.17.0rc0, 2.17.0rc1, 2.17.0, 2.17.1, 2.18.0rc0, 2.18.0rc1, 2.18.0rc2, 2.18.0, 2.18.1, 2.19.0rc0, 2.19.0, 2.19.1, 2.20.0rc0, 2.20.0)[0m[31m
[0m[31mERROR: No matching distribution found for tensorflow==2.12[0m[31m
[0m

In [6]:
#1) Imports + Load CSVs + Merge
import pandas as pd
import numpy as np
import ast
import re
from tqdm import tqdm

# Load datasets (keep same folder)
movies = pd.read_csv('tmdb_5000_movies.csv')
credits = pd.read_csv('tmdb_5000_credits.csv')

# Merge on title & select needed columns
df = movies.merge(credits, on='title', how='inner')
df = df[['movie_id','title','overview','genres']].dropna().reset_index(drop=True)

print(df.shape)
df.head()


(4806, 4)


Unnamed: 0,movie_id,title,overview,genres
0,19995,Avatar,"In the 22nd century, a paraplegic Marine is di...","[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam..."
1,285,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...","[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""..."
2,206647,Spectre,A cryptic message from Bond’s past sends him o...,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam..."
3,49026,The Dark Knight Rises,Following the death of District Attorney Harve...,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam..."
4,49529,John Carter,"John Carter is a war-weary, former military ca...","[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam..."


In [7]:
#2) Parse Genres → Multi-label One-Hot (supervised signal)
def parse_genres(g):
    # g is a string like: '[{"id":28,"name":"Action"}, ...]'
    try:
        return [x['name'] for x in ast.literal_eval(g)]
    except Exception:
        return []

df['genres_list'] = df['genres'].apply(parse_genres)
df = df[df['genres_list'].map(len) > 0].reset_index(drop=True)

from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()
Y = mlb.fit_transform(df['genres_list'])

print("Num movies:", len(df), "| Num genres:", len(mlb.classes_))


Num movies: 4778 | Num genres: 20


In [8]:
#3) Clean Text + Tokenize + Pad
def clean_text(t):
    t = str(t).lower()
    t = re.sub(r'[^a-z0-9\s]', ' ', t)
    t = re.sub(r'\s+', ' ', t).strip()
    return t

df['clean_overview'] = df['overview'].apply(clean_text)

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

VOCAB = 20000       # vocab size
MAX_LEN = 300       # max tokens per overview

tok = Tokenizer(num_words=VOCAB, oov_token='<unk>')
tok.fit_on_texts(df['clean_overview'])

seqs = tok.texts_to_sequences(df['clean_overview'])
X = pad_sequences(seqs, maxlen=MAX_LEN, padding='post', truncating='post')

X.shape, Y.shape


((4778, 300), (4778, 20))

In [9]:
#4) Train/Val Split
from sklearn.model_selection import train_test_split
X_tr, X_val, Y_tr, Y_val = train_test_split(X, Y, test_size=0.1, random_state=42, stratify=Y.sum(axis=1)>0)
X_tr.shape, X_val.shape


((4300, 300), (478, 300))

In [10]:
#Build BiLSTM Model (predict genres; penultimate layer = movie embedding)
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Bidirectional, LSTM, GlobalMaxPool1D, Dense, Dropout
from tensorflow.keras.models import Model

EMB_DIM = 128      # word embedding dim
HID_DIM = 128      # LSTM hidden
MOV_EMB = 256      # final movie embedding size

inp = Input(shape=(MAX_LEN,))
emb = Embedding(VOCAB, EMB_DIM, mask_zero=True)(inp)
x = Bidirectional(LSTM(HID_DIM, return_sequences=True))(emb)
x = GlobalMaxPool1D()(x)
movie_embedding = Dense(MOV_EMB, activation='relu', name='movie_embedding')(x)
x = Dropout(0.3)(movie_embedding)
out = Dense(Y.shape[1], activation='sigmoid', name='genre_output')(x)

model = Model(inp, out)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()




In [11]:
#6) Train (EarlyStopping + ModelCheckpoint recommended)
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

es = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
ckpt = ModelCheckpoint('bilstm_genre_best.keras', monitor='val_loss', save_best_only=True)

history = model.fit(
    X_tr, Y_tr,
    validation_data=(X_val, Y_val),
    epochs=8,
    batch_size=64,
    callbacks=[es, ckpt],
    verbose=1
)


Epoch 1/8
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m178s[0m 3s/step - accuracy: 0.1806 - loss: 0.4929 - val_accuracy: 0.2259 - val_loss: 0.3185
Epoch 2/8
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m155s[0m 2s/step - accuracy: 0.2232 - loss: 0.3271 - val_accuracy: 0.2259 - val_loss: 0.3182
Epoch 3/8
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m126s[0m 2s/step - accuracy: 0.2290 - loss: 0.3267 - val_accuracy: 0.2259 - val_loss: 0.3166
Epoch 4/8
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 2s/step - accuracy: 0.2343 - loss: 0.3213 - val_accuracy: 0.2301 - val_loss: 0.3118
Epoch 5/8
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m118s[0m 2s/step - accuracy: 0.3151 - loss: 0.3009 - val_accuracy: 0.2824 - val_loss: 0.3022
Epoch 6/8
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 2s/step - accuracy: 0.3646 - loss: 0.2756 - val_accuracy: 0.3326 - val_loss: 0.2971
Epoch 7/8
[1m68/68[0m [32m━━━━━━━━━━━

In [12]:
#7) Extract Movie Embeddings (penultimate layer output)

# Model that outputs the 'movie_embedding' layer
embed_model = Model(inputs=model.input, outputs=model.get_layer('movie_embedding').output)

movie_vecs = embed_model.predict(X, batch_size=256, verbose=1)  # shape: (N, MOV_EMB)

# Normalize for stable cosine (optional but good)
norms = np.linalg.norm(movie_vecs, axis=1, keepdims=True) + 1e-8
movie_vecs_norm = movie_vecs / norms
movie_vecs_norm.shape


[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 3s/step


(4778, 256)

In [31]:
from sklearn.metrics.pairwise import cosine_similarity

similarity = cosine_similarity(movie_vecs_norm)
similarity.shape


(4778, 4778)

In [32]:
titles = df['title'].tolist()
title_to_idx = {t: i for i, t in enumerate(titles)}

def recommend(movie, k=5):
    if movie not in title_to_idx:
        return ["Movie not found"]

    idx = title_to_idx[movie]
    sims = list(enumerate(similarity[idx]))

    # sort by similarity score
    sims = sorted(sims, key=lambda x: x[1], reverse=True)

    # pick top k (skip itself → [1:k+1])
    top_movies = sims[1:k+1]

    return [titles[i] for i, _ in top_movies]


In [33]:
recommend("Avatar", 5)


['Green Lantern',
 'Mad Max Beyond Thunderdome',
 'Independence Day: Resurgence',
 'Transformers: Revenge of the Fallen',
 'Beastmaster 2: Through the Portal of Time']

In [34]:
titles = df['title'].tolist()
title_to_idx = {t: i for i, t in enumerate(titles)}

def recommend(movie, k=5):
    # movie available check
    if movie not in title_to_idx:
        return ["❌ Movie not found! Check spelling"]

    idx = title_to_idx[movie]
    sims = list(enumerate(similarity[idx]))

    # highest similarity
    sims = sorted(sims, key=lambda x: x[1], reverse=True)
    top_movies = sims[1:k+1]   # skip itself

    return [titles[i] for i,_ in top_movies]


In [35]:
movie_name = input("🎬 Enter movie name: ")

recs = recommend(movie_name, 5)

print("\n✅ Top 5 Recommended Movies for:", movie_name)
for r in recs:
    print("👉", r)


🎬 Enter movie name: Batman

✅ Top 5 Recommended Movies for: Batman
👉 Batman
👉 Sausage Party
👉 Logan's Run
👉 Big Hero 6
👉 Muppets from Space


In [36]:
!pip install ipywidgets


Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Downloading jedi-0.19.2-py2.py3-none-any.whl (1.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: jedi
Successfully installed jedi-0.19.2


In [37]:
from google.colab import output
output.enable_custom_widget_manager()


In [38]:
import ipywidgets as widgets
from IPython.display import display

# titles list already available: titles = df['title'].tolist()

# Dropdown widget
dropdown = widgets.Dropdown(
    options=titles,
    description='🎬 Movie:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

button = widgets.Button(
    description='Recommend',
    button_style='success'
)

output_box = widgets.Output()

def on_button_clicked(b):
    with output_box:
        output_box.clear_output()
        movie = dropdown.value
        recs = recommend(movie, 5)

        print(f"\n✅ Top 5 recommended movies for: {movie}\n")
        for r in recs:
            print("👉", r)

button.on_click(on_button_clicked)

display(dropdown, button, output_box)


Dropdown(description='🎬 Movie:', layout=Layout(width='50%'), options=('Avatar', "Pirates of the Caribbean: At …

Button(button_style='success', description='Recommend', style=ButtonStyle())

Output()

In [39]:
#✅ STEP 1 — Install Required Packages
!pip install ipywidgets requests
from google.colab import output
output.enable_custom_widget_manager()




In [40]:
#✅ STEP 2 API
TMDB_API_KEY = "YOUR_TMDB_API_KEY"


In [41]:
#✅ STEP 3 — Poster Fetch Function
import requests

def get_poster(movie_id):
    url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={TMDB_API_KEY}"
    data = requests.get(url).json()
    poster_path = data.get("poster_path")

    if poster_path:
        return "https://image.tmdb.org/t/p/w500" + poster_path
    else:
        return "https://via.placeholder.com/500x750?text=No+Poster"


In [43]:
✅ STEP 4 — FANCY UI (Dropdown + Posters + Grid View)
import ipywidgets as widgets
from IPython.display import display, HTML

# main dropdown to choose a movie
dropdown = widgets.Combobox(
    placeholder='Type or select a movie',
    options=titles,
    description='🎬 Movie:',
    ensure_option=True,
    layout=widgets.Layout(width='50%')
)

button = widgets.Button(
    description='Recommend',
    button_style='info',
    icon='search'
)

output_box = widgets.Output()

def on_recommend_click(b):
    with output_box:
        output_box.clear_output()

        movie = dropdown.value
        results = recommend(movie, 5)

        html = f"<h2>✅ Top 5 Movies like <b>{movie}</b></h2><br>"

        html += "<div style='display:flex; gap:20px;'>"

        for r in results:
            try:
                movie_id = df[df['title'] == r].iloc[0]['movie_id']
                poster_url = get_poster(movie_id)
            except:
                poster_url = "https://via.placeholder.com/500x750?text=No+Poster"

            html += f"""
            <div style='text-align:center;'>
                <img src="{poster_url}" style="width:160px; border-radius:10px;">
                <p style='font-size:16px; font-weight:bold;'>{r}</p>
            </div>
            """

        html += "</div>"

        display(HTML(html))

button.on_click(on_recommend_click)

display(dropdown, button, output_box)


Combobox(value='', description='🎬 Movie:', ensure_option=True, layout=Layout(width='50%'), options=('Avatar', …

Button(button_style='info', description='Recommend', icon='search', style=ButtonStyle())

Output()

In [44]:
#✅ STEP 1 — Install & Enable Widgets

!pip install ipywidgets requests
from google.colab import output
output.enable_custom_widget_manager()




In [57]:
TMDB_API_KEY = "e488bc4a"



In [58]:
#✅ STEP 3 — Helper Functions (Poster, Cast, Trailer, Genres)

import requests

def tmdb_movie_details(movie_id):
    url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={TMDB_API_KEY}&append_to_response=credits,videos"
    return requests.get(url).json()

def poster_url(path):
    if not path:
        return "https://via.placeholder.com/500x750?text=No+Poster"
    return f"https://image.tmdb.org/t/p/w500{path}"

def get_genre_chips(genres):
    chips = ""
    for g in genres:
        chips += f"<span style='background:#e50914;padding:5px 10px;border-radius:20px;margin-right:5px;color:white;font-size:12px;'>{g}</span>"
    return chips

def get_cast_images(cast_list):
    HTML_cast = "<div style='display:flex;gap:10px;margin-top:10px;'>"
    for actor in cast_list[:5]:
        img = poster_url(actor.get('profile_path'))
        HTML_cast += f"""
        <div style='text-align:center;'>
            <img src='{img}' style='width:60px;border-radius:50%;'>
            <p style='font-size:10px;'>{actor['name']}</p>
        </div>
        """
    HTML_cast += "</div>"
    return HTML_cast

def get_trailer(videos):
    for v in videos.get("results", []):
        if v["type"] == "Trailer" and v["site"] == "YouTube":
            return f"https://www.youtube.com/watch?v={v['key']}"
    return None


In [59]:
from IPython.display import HTML, display
import ipywidgets as widgets

def netflix_recommend(movie, k=10):
    movie = movie.strip()
    if movie not in title_to_idx:
        return HTML("<h2 style='color:red;'>Movie not found!</h2>")

    idx = title_to_idx[movie]
    sims = list(enumerate(similarity[idx]))
    sims = sorted(sims, key=lambda x: x[1], reverse=True)[1:k+1]

    html = f"<h1 style='color:white;background:#141414;padding:15px;border-radius:10px;'>🎬 Movies like <b>{movie}</b></h1>"

    html += "<div style='display:flex;overflow-x:auto;gap:20px;padding:10px;'>"

    for i, _ in sims:
        title = titles[i]
        movie_id = df[df["title"] == title].iloc[0]["movie_id"]

        details = tmdb_movie_details(movie_id)

        poster = poster_url(details.get("poster_path"))
        genres = [g['name'] for g in details.get('genres', [])]
        cast = details.get("credits", {}).get("cast", [])
        rating = details.get("vote_average", 0)
        year = details.get("release_date", "")[:4]
        trailer = get_trailer(details.get("videos", {}))

        html += f"""
        <div style='min-width:220px;background:#222;padding:10px;border-radius:10px;color:white;'>
            <img src='{poster}' style='width:200px;border-radius:10px;'>
            <h3>{title} ({year})</h3>
            <p>⭐ {rating}</p>
            {get_genre_chips(genres)}
            <br><br>
            {get_cast_images(cast)}
            <br>
        """

        if trailer:
            html += f"<a href='{trailer}' target='_blank' style='color:#00eaff;'>▶ Watch Trailer</a>"

        html += "</div>"

    html += "</div>"

    return HTML(html)


In [60]:
#✅ STEP 4 — ULTRA NETFLIX UI RECOMMENDER FUNCTION

from IPython.display import HTML, display
import ipywidgets as widgets

def netflix_recommend(movie, k=10):
    movie = movie.strip()
    if movie not in title_to_idx:
        return HTML("<h2 style='color:red;'>Movie not found!</h2>")

    idx = title_to_idx[movie]
    sims = list(enumerate(similarity[idx]))
    sims = sorted(sims, key=lambda x: x[1], reverse=True)[1:k+1]

    html = f"<h1 style='color:white;background:#141414;padding:15px;border-radius:10px;'>🎬 Movies like <b>{movie}</b></h1>"

    html += "<div style='display:flex;overflow-x:auto;gap:20px;padding:10px;'>"

    for i, _ in sims:
        title = titles[i]
        movie_id = df[df["title"] == title].iloc[0]["movie_id"]

        details = tmdb_movie_details(movie_id)

        poster = poster_url(details.get("poster_path"))
        genres = [g['name'] for g in details.get('genres', [])]
        cast = details.get("credits", {}).get("cast", [])
        rating = details.get("vote_average", 0)
        year = details.get("release_date", "")[:4]
        trailer = get_trailer(details.get("videos", {}))

        html += f"""
        <div style='min-width:220px;background:#222;padding:10px;border-radius:10px;color:white;'>
            <img src='{poster}' style='width:200px;border-radius:10px;'>
            <h3>{title} ({year})</h3>
            <p>⭐ {rating}</p>
            {get_genre_chips(genres)}
            <br><br>
            {get_cast_images(cast)}
            <br>
        """

        if trailer:
            html += f"<a href='{trailer}' target='_blank' style='color:#00eaff;'>▶ Watch Trailer</a>"

        html += "</div>"

    html += "</div>"

    return HTML(html)


In [61]:
#✅ STEP 5 — Dropdown + UI Launcher
dropdown = widgets.Combobox(
    placeholder='Choose a movie',
    options=titles,
    description='🎬 Movie:',
    ensure_option=True,
    layout=widgets.Layout(width='50%')
)

button = widgets.Button(
    description='Show Recommendations',
    button_style='danger',
    icon='fire'
)

output_box = widgets.Output()

def on_click(b):
    with output_box:
        output_box.clear_output()
        display(netflix_recommend(dropdown.value, 10))

button.on_click(on_click)

display(dropdown, button, output_box)


Combobox(value='', description='🎬 Movie:', ensure_option=True, layout=Layout(width='50%'), options=('Avatar', …

Button(button_style='danger', description='Show Recommendations', icon='fire', style=ButtonStyle())

Output()

In [62]:
# ---------- NO-API placeholder poster (works instantly) ----------
import urllib.parse

def poster_placeholder_by_title(title, w=300, h=450, bg="111827", fg="ffffff"):
    # returns a generated image URL with movie title text
    text = urllib.parse.quote_plus(title)
    # using dummyimage.com or via.placeholder
    # Using dummyimage.com style (no external key)
    return f"https://dummyimage.com/{w}x{h}/{bg}/{fg}.png&text={text}"

# Example usage:
# poster = poster_placeholder_by_title("Avatar")
# st.image(poster)  # or display in HTML


In [64]:
# 1) placeholder function
import urllib.parse, ipywidgets as widgets
from IPython.display import display, HTML

def poster_placeholder_by_title(title, w=200, h=300, bg="0b0f1a", fg="ffffff"):
    text = urllib.parse.quote_plus(title)
    return f"https://dummyimage.com/{w}x{h}/{bg}/{fg}.png&text={text}"

# 2) dropdown UI (uses existing recommend() and titles list)
dropdown = widgets.Combobox(
    placeholder='Type or select a movie',
    options=titles,
    description='🎬 Movie:',
    ensure_option=True,
    layout=widgets.Layout(width='60%')
)
button = widgets.Button(description='Recommend', button_style='success')
out = widgets.Output()

def on_click(b):
    out.clear_output()
    movie = dropdown.value
    recs = recommend(movie, 5)   # your existing recommend function
    html = "<div style='display:flex;gap:16px;'>"
    for r in recs:
        img = poster_placeholder_by_title(r, w=160, h=240)
        html += f"<div style='text-align:center;'><img src='{img}' style='border-radius:8px;'><div style='width:160px'>{r}</div></div>"
    html += "</div>"
    with out:
        display(HTML(html))

button.on_click(on_click)
display(dropdown, button, out)


Combobox(value='', description='🎬 Movie:', ensure_option=True, layout=Layout(width='60%'), options=('Avatar', …

Button(button_style='success', description='Recommend', style=ButtonStyle())

Output()

In [None]:
 http://www.omdbapi.com/?i=tt3896198&apikey=e488bc4a

In [None]:
!pip install pandas numpy scikit-learn requests


In [None]:
import pickle
import pandas as pd

# movie titles and tags DataFrame
movies = pd.read_csv("/content/movie_titles.csv")     # or your DataFrame

# similarity matrix
with open("/content/similarity_dl.pkl", "rb") as f:
    similarity = pickle.load(f)

titles = movies['title'].tolist()


In [65]:
OMDB_KEY = "e488bc4a"   # ✅ Your real working key


In [66]:
import requests

def get_movie_details_omdb(title):
    url = f"http://www.omdbapi.com/?t={title}&apikey={OMDB_KEY}"
    try:
        data = requests.get(url).json()
        return data
    except:
        return {}

def get_poster_omdb(title):
    data = get_movie_details_omdb(title)
    poster = data.get("Poster")
    if poster and poster != "N/A":
        return poster
    return "https://via.placeholder.com/300x450?text=No+Poster"


In [67]:
def recommend(movie_name, k=5):
    index = movies[movies['title'] == movie_name].index[0]
    distances = similarity[index]
    movie_list = sorted(list(enumerate(distances)), reverse=True, key=lambda x: x[1])[1:k+1]

    recommended_names = []
    recommended_posters = []

    for i in movie_list:
        title = movies.iloc[i[0]].title
        recommended_names.append(title)
        recommended_posters.append(get_poster_omdb(title))

    return recommended_names, recommended_posters


In [69]:
import ipywidgets as widgets
from IPython.display import HTML, display

dropdown = widgets.Combobox(
    placeholder='Type or select a movie',
    options=titles,
    description='🎬 Movie:',
    ensure_option=True,
    layout=widgets.Layout(width='60%')
)

button = widgets.Button(description='Recommend', button_style='success')
out = widgets.Output()

def on_click(b):
    movie = dropdown.value
    names, posters = recommend(movie)

    html = "<h3>✅ Recommended Movies</h3>"
    html += "<div style='display:flex;gap:16px;'>"

    for t, p in zip(names, posters):
        html += f"""
            <div style='text-align:center;'>
                <img src='{p}' style='width:170px;height:250px;border-radius:10px;'>
                <p style='width:170px;'>{t}</p>
            </div>
        """
    html += "</div>"

    with out:
        out.clear_output()
        display(HTML(html))

button.on_click(on_click)
display(dropdown, button, out)


Combobox(value='', description='🎬 Movie:', ensure_option=True, layout=Layout(width='60%'), options=('Avatar', …

Button(button_style='success', description='Recommend', style=ButtonStyle())

Output()