In [2]:
import pandas as pd
import ast
import numpy as np
import os
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [3]:
print(os.listdir("/content/drive/MyDrive"))
['tmdb_5000_movies.csv', 'tmdb_5000_credits.csv', 'Colab Notebooks']

['tmdb_5000_credits.csv', 'tmdb_5000_movies.csv', 'Photos', 'Colab Notebooks', '.ipynb_checkpoints', 'custom_emotion_dataset.csv.gsheet', 'flowers.jpg', 'CropDiseaseCorpus.gsheet', 'movie-recommender', 'cat.jpg']


['tmdb_5000_movies.csv', 'tmdb_5000_credits.csv', 'Colab Notebooks']

In [4]:
MOVIES_CSV = "/content/drive/MyDrive/tmdb_5000_movies.csv"
CREDITS_CSV = "/content/drive/MyDrive/tmdb_5000_credits.csv"
print("Loading CSV files...")

movies = pd.read_csv(MOVIES_CSV)
credits = pd.read_csv(CREDITS_CSV)

print("Merging datasets...")
movies = movies.merge(credits, on='title', how='inner')

print(f"Loaded {len(movies)} movies successfully!")
movies.head(3)

Loading CSV files...
Merging datasets...
Loaded 4809 movies successfully!


Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,movie_id,cast,crew
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...",...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,19995,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...",...,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,285,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,245000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.sonypictures.com/movies/spectre/,206647,"[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...",en,Spectre,A cryptic message from Bondâ€™s past sends him o...,107.376788,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...",...,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,206647,"[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."


In [7]:
def parse_col(x):
    try:
        L = ast.literal_eval(x)
    except Exception:
        return []
    if isinstance(L, list):
        names = []
        for i in L:
            if isinstance(i, dict) and 'name' in i:
                names.append(i['name'])
            elif isinstance(i, str):
                names.append(i)
        return names
    return []

def collapse_list(x):
    if isinstance(x, list):
        return " ".join(i.replace(" ", "") for i in x)
    return ""

print("Cleaning and preparing data")

Cleaning and preparing data


In [None]:
movies = movies[['movie_id','title','overview','genres','keywords','cast','crew']]
for col in ['genres','keywords','cast','crew']:
    movies[col] = movies[col].fillna('[]').apply(parse_col)
    if col == 'cast':
        movies[col] = movies[col].apply(lambda x: x[:3])  # keep top 3 actors
    if col == 'crew':
        def get_director(l):
            for d in l:
                if isinstance(d, dict) and d.get('job','').lower() == 'director':
                    return d.get('name','')
            return ""
        movies[col] = movies[col].apply(get_director)
    movies[col] = movies[col].apply(collapse_list)

movies['overview'] = movies['overview'].fillna("")

movies['tags'] = (
    movies['overview'] + " " +
    movies['genres'] + " " +
    movies['keywords'] + " " +
    movies['cast'] + " " +
    movies['crew']
).str.lower()

In [None]:
print("Data cleaned and 'tags' column created!")
movies.head(2)[['title', 'tags']]

Data cleaned and 'tags' column created!


Unnamed: 0,title,tags
0,Avatar,"in the 22nd century, a paraplegic marine is di..."
1,Pirates of the Caribbean: At World's End,"captain barbossa, long believed to be dead, ha..."


In [None]:
print("Vectorizing text with TF-IDF...")
tfidf = TfidfVectorizer(max_features=5000, stop_words='english')
vectors = tfidf.fit_transform(movies['tags']).toarray()

print("Calculating cosine similarity matrix...")
similarity = cosine_similarity(vectors)

print(f"TF-IDF and similarity matrix ready! Shape: {similarity.shape}")


Vectorizing text with TF-IDF...
Calculating cosine similarity matrix...
TF-IDF and similarity matrix ready! Shape: (4809, 4809)


In [None]:
def recommend(movie):
    movie = movie.lower()
    if movie not in movies['title'].str.lower().values:
        print("Movie not found in dataset.")
        return

    index = movies[movies['title'].str.lower() == movie].index[0]
    distances = list(enumerate(similarity[index]))
    movies_list = sorted(distances, key=lambda x: x[1], reverse=True)[1:6]

    print(f"\nTop 5 movies similar to '{movies.iloc[index].title}':\n")
    for i in movies_list:
        print(movies.iloc[i[0]].title)

print("Recommendation function ready!")

Recommendation function ready!


In [None]:
recommend("Avatar")


Top 5 movies similar to 'Avatar':

Falcon Rising
Battle: Los Angeles
Apollo 18
Star Trek Into Darkness
Predators


In [None]:
pickle.dump(movies, open('movies.pkl', 'wb'))
pickle.dump(similarity, open('similarity.pkl', 'wb'))

print("Files saved: movies.pkl and similarity.pkl")

Files saved: movies.pkl and similarity.pkl


In [6]:
%%writefile app.py
import streamlit as st
import pickle

movies = pickle.load(open('movies.pkl', 'rb'))
similarity = pickle.load(open('similarity.pkl', 'rb'))

st.set_page_config(page_title="ðŸŽ¬ Movie Recommender System", layout="wide")

st.markdown("""
<style>
.stApp {
    background-color: #111;
    color: white;
    font-family: 'Poppins', sans-serif;
}
.stButton>button {
    background-color: #F97A5D;
    color: white;
    border-radius: 20px;
    padding: 10px 24px;
    border: none;
    font-weight: 600;
    transition: 0.3s;
}
.stButton>button:hover {
    background-color: #ff9378;
}
h1, h2, h3, h4 {
    color: white;
}
.next-btn {
    display: flex;
    justify-content: center;
    margin-top: 30px;
}
</style>
""", unsafe_allow_html=True)

#title
st.title("Movie Recommender System")
st.write("Find movies similar to your favorite one! Select a movie below ðŸ‘‡")

movie_list = movies['title'].values
selected_movie = st.selectbox("Select a movie:", movie_list)

#Recoms func
def recommend(movie, start=0):
    index = movies[movies['title'] == movie].index[0]
    distances = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda x: x[1])
    recommendations = [movies.iloc[i[0]].title for i in distances[1:]]
    return recommendations[start:start+5]

#session
if "page" not in st.session_state:
    st.session_state.page = 0
if "recommendations" not in st.session_state:
    st.session_state.recommendations = []

#rec button
if st.button("Show Recommendations", key="show_btn"):
    st.session_state.page = 0
    st.session_state.recommendations = recommend(selected_movie)

#display
if st.session_state.recommendations:
    st.subheader("Recommended Movies:")
    names = st.session_state.recommendations
    cols = st.columns(5)
    for idx, col in enumerate(cols):
        if idx < len(names):
            with col:
                st.write(f"**{names[idx]}**")

#nxt button
col1, col2, col3 = st.columns([3,1,3])
with col2:
    if st.button("Next â†’", key="next_btn"):
        st.session_state.page += 1
        start = st.session_state.page * 5
        st.session_state.recommendations = recommend(selected_movie, start)

Overwriting app.py
