In [1]:
import pandas as pd
movies = pd.read_csv('tmdb_5000_movies.csv')
credits = pd.read_csv('tmdb_5000_credits.csv')
df = movies.merge( credits ,left_on= "id" , right_on="movie_id")

In [2]:
df['genres'] = df['genres'].fillna('[]')       
df['keywords'] = df['keywords'].fillna('[]')  
df['cast'] = df['cast'].fillna('[]')           
df['crew'] = df['crew'].fillna('[]')           
df['overview'] = df['overview'].fillna('')     

In [3]:
import ast
def parse_cast(cast):
    cast_list = ast.literal_eval(cast)
    return [cast['name'] for cast in cast_list[:3]]
    
df["parsed_cast"] = df["cast"].apply(parse_cast)

def parse_crew(crew):
    crew_list = ast.literal_eval(crew)
    for member in crew_list:
        if member["job"] == "Director":
            return [member["name"]]
    return []
    
df["parsed_crew"] = df["crew"].apply(parse_crew)

def parse_genres(genres_str):
    genres_list = ast.literal_eval(genres_str)
    return [genre['name'] for genre in genres_list]

df['genres_parsed'] = df['genres'].apply(parse_genres)


def parse_keywords(keywords_str):
    keywords_list = ast.literal_eval(keywords_str)
    return [keyword['name'] for keyword in keywords_list]


df['keywords_parsed'] = df['keywords'].apply(parse_keywords)

def parsed_production(production):
    production_list = ast.literal_eval(production)
    return [ prod["name"] for prod in production_list[:4]]

df["parsed_production"] = df["production_companies"].apply(parsed_production)

In [4]:
def create_soup(x):
    return (
        (10*' '.join(x['genres_parsed'])) + ' ' +
        (5*' '.join(x['keywords_parsed'])) + ' ' +
        (3*' '.join(x['parsed_crew']) )+ ' ' +
        (2*x['overview'])
    )
    
df["soup"] = df.apply(create_soup , axis = 1)

In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer

vector = TfidfVectorizer( stop_words="english" , max_features=5000)
vectormatrix = vector.fit_transform(df['soup'])
from sklearn.metrics.pairwise import cosine_similarity
similarity = cosine_similarity(vectormatrix , vectormatrix)

In [6]:
import pickle
with open("similarity.pkl" , "wb") as f:
    pickle.dump(similarity , f)
df[['original_title', 'soup']].to_pickle('movies.pkl')


In [7]:
indices = pd.Series(df.index, index=df['original_title'].str.lower()).drop_duplicates()

from difflib import get_close_matches

def recommend(title, cosine_sim=similarity):
    title = title.lower()

    if title not in indices:
        matches = get_close_matches(title, indices.index, n=1, cutoff=0.6)
        if matches:
            suggestion = matches[0]
            confirm = input(f"Did you mean '{suggestion}'? (y/n): ").lower()
            if confirm == 'y':
                title = suggestion
            else:
                print("Movie not found!")
                return
        else:
            print("Movie not found!")
            return

    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    sim_scores = sim_scores[1:11]
    movie_indices = [i[0] for i in sim_scores]

    return df[['original_title', 'vote_average', 'release_date']].iloc[movie_indices]


In [8]:

while True:
    a = input("Enter a movie title (or type 'quit' to exit): ")

    if a.lower() == "quit":
        print("Exiting recommendation system. Goodbye!")
        break
    else:
        x = recommend(a)
        print(a)
        print(x)

interstellar
                  original_title  vote_average release_date
4332              Silent Running           6.3   09-03-1972
461                Lost in Space           5.0   03-04-1998
300            Starship Troopers           6.7   06-11-1997
3405  Stargate: The Ark of Truth           6.9   11-03-2008
1531                   Moonraker           5.9   26-06-1979
149                   Armageddon           6.4   01-07-1998
278           Planet of the Apes           5.6   25-07-2001
2966       2001: A Space Odyssey           7.9   10-04-1968
643                Space Cowboys           6.3   31-07-2000
3158                       Alien           7.9   25-05-1979
Movie not found!
arrival
None
Movie not found!
endgame
None
avemgrs
                          original_title  vote_average release_date
182                              Ant-Man           7.0   14-07-2015
174                  The Incredible Hulk           6.1   12-06-2008
79                            Iron Man 2           6.6 