In [1]:
import joblib
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

In [2]:
feature_matrix, combined_df = joblib.load(r"D:\Coding\Code\Python\ML\IMDB Project\Model Bulding\movie_feature_model.pkl")
print("Loaded")
combined_df

Loaded


Unnamed: 0.1,Unnamed: 0,tconst,nconst,directors,genres,popularity_score
0,0,tt0000005,nm0443482,nm0005690,['short'],0.578024
1,1,tt0000005,nm0653042,nm0005690,['short'],0.578024
2,2,tt0000007,nm0179163,nm0005690,"['short', ' sport']",0.479716
3,3,tt0000007,nm0179163,nm0374658,"['short', ' sport']",0.479716
4,4,tt0000007,nm0183947,nm0005690,"['short', ' sport']",0.479716
...,...,...,...,...,...,...
10962033,10962033,tt9916840,nm1052583,nm0996406,"['adventure', ' animation', ' comedy']",0.656166
10962034,10962034,tt9916840,nm1052583,nm0996406,"['adventure', ' animation', ' comedy']",0.656166
10962035,10962035,tt9916840,nm2676923,nm0996406,"['adventure', ' animation', ' comedy']",0.656166
10962036,10962036,tt9916840,nm2676923,nm0996406,"['adventure', ' animation', ' comedy']",0.656166


In [3]:
actors_df = pd.read_csv(r"D:\Coding\Code\Python\ML\Data\IMDB\Cleaned Data\FinalActorBasedRecomendation.csv")  # Load the actor-based data into a DataFrame
directors_df = pd.read_csv(r"D:\Coding\Code\Python\ML\Data\IMDB\Cleaned Data\FinalDirectorBasedRecomendation.csv")  # Load the director-based data into a DataFrame
genres_df = pd.read_csv(r"D:\Coding\Code\Python\ML\Data\IMDB\Cleaned Data\FinalGenreBasedRecomendation.csv")  # Load the genre-based data into a DataFrame
popularity_df = pd.read_csv(r"D:\Coding\Code\Python\ML\Data\IMDB\Cleaned Data\FinalPopularityBasedRecomendation.csv")  # Load the popularity-based data into a DataFrame
print("Loaded")

Loaded


In [10]:
def get_movie_details(tconst):
    # Extract actors for the given tconst (movie ID) by filtering the actors DataFrame
    movie_actors = actors_df[actors_df['tconst'] == tconst]['primaryName'].tolist()
    movie_directors = directors_df[directors_df['tconst'] == tconst]['primaryName'].tolist()

    # Extract genres for the given tconst (movie ID) by filtering the genres DataFrame
    movie_genres = genres_df[genres_df['tconst'] == tconst]['genres'].values
    movie_genres = movie_genres[0] if len(movie_genres) > 0 else "Unknown"

    # Extract average rating and number of votes for the movie by filtering the popularity DataFrame
    movie_rating = popularity_df[popularity_df['tconst'] == tconst][['averageRating', 'numVotes']].values
    if len(movie_rating) > 0:
        movie_rating = {
            "averageRating": movie_rating[0][0],  # Extract the average rating
            "numVotes": movie_rating[0][1]  # Extract the number of votes
        }
    else:
        movie_rating = {"averageRating": "Unknown", "numVotes": "Unknown"}  # Default values if no data found

    # Combine and return the results in a dictionary
    return {
        "Actors": movie_actors,
        "Directors": movie_directors,
        "Genres": movie_genres,
        "Rating": movie_rating
    }


def watch_list(watchedMovie, watchedMoviesIds):
    # Filter the genres DataFrame for movies matching the given name
    matching_movies = genres_df[genres_df['primaryTitle'] == watchedMovie]

    if matching_movies.empty:
        print(f"No movie found with the name '{watchedMovie}'.")
        return

    if len(matching_movies) > 1:
        print(f"Multiple movies found with the name '{watchedMovie}':")
        for idx, row in matching_movies.iterrows():
            # Get movie details using get_movie_details
            movie_details = get_movie_details(row['tconst'])
            print(f"[{idx}] {row['primaryTitle']} ({row['titleType']}, Genres: {row['genres']})")
            print("  Details:")
            print(f"    Actors: {', '.join(movie_details['Actors']) if movie_details['Actors'] else 'Unknown'}")
            print(f"    Directors: {', '.join(movie_details['Directors']) if movie_details['Directors'] else 'Unknown'}")
            print(f"    Genres: {movie_details['Genres']}")
            print(f"    Rating: {movie_details['Rating']['averageRating']} (Votes: {movie_details['Rating']['numVotes']})")

        try:
            selected_idx = int(input("Enter the number corresponding to the correct movie: "))
            selected_movie = matching_movies.loc[selected_idx]
        except (ValueError, KeyError):
            print("Invalid selection. No movie added to the watch list.")
            return
    else:
        # If there's only one match, select it automatically
        selected_movie = matching_movies.iloc[0]

    # Add the selected movie's ID to the watched list
    watchedMoviesIds.append(selected_movie['tconst'])
    print(f"Added '{selected_movie['primaryTitle']}' (ID: {selected_movie['tconst']}) to the watch list.")


def recommend_movies(movie_ids, top_n=10):
    """Recommend movies similar to the given list of movie IDs along with unique similarity scores."""
    # Ensure input is a list
    if isinstance(movie_ids, str):
        movie_ids = [movie_ids]
    
    # Get indices of all valid movie IDs
    movie_indices = combined_df.index[combined_df['tconst'].isin(movie_ids)].tolist()
    if not movie_indices:
        return "No valid movie IDs found."
    
    # Compute combined similarity scores
    similarity = cosine_similarity(feature_matrix[movie_indices], feature_matrix)
    combined_similarity = similarity.mean(axis=0)  # Average similarity across all input movies
    
    # Get the indices of the top N most similar movies (excluding the input movies)
    similar_indices = combined_similarity.argsort()[::-1]  # Sort by similarity scores in descending order
    similarity_scores = combined_similarity[similar_indices]  # Extract corresponding similarity scores
    
    # Get the recommended movies with similarity scores
    recommended_movies = combined_df.iloc[similar_indices]
    recommended_movies = recommended_movies.copy()
    recommended_movies['similarity_score'] = similarity_scores  # Add similarity scores to the output

    # Ensure unique tconst values
    recommended_movies = recommended_movies.drop_duplicates(subset='tconst', keep='first')

    # Exclude the input movies and limit to top N results
    recommended_movies = recommended_movies[~recommended_movies['tconst'].isin(movie_ids)].head(top_n)

    return recommended_movies[['tconst', 'nconst', 'directors', 'genres', 'popularity_score', 'similarity_score']]

In [11]:
watchedMoviesIds = []

In [12]:
def main():
    x = 1
    print('''
Welcome to the movie recommendation system
Please choose from the following:
    1- Add to the watched list
    2- Show the watched list
    3- Get details for a movie
    4- Recommend a movie based on your watched list
    5- Recommend a movie based on a single movie
    6- End the program
''')
    while x != 0:
        choice = input("Please Enter Your Choice: ")

        if choice == "1":
            movie = input("Enter The Movie Name: ")
            watch_list(movie, watchedMoviesIds)
        elif choice == "2":
            for movie in watchedMoviesIds:
                get_movie_details(movie)
        elif choice == "3":
            movie = input("Enter The Movie Name: ")
            get_movie_details(movie)
        elif choice == "4":
            recommend_movies(watchedMoviesIds)
        elif choice == "5":
            movie = input("Enter The Movie Name: ")
            recommend_movies(movie)
        elif choice == "6":
            x = 0
        else: 
            print("invalid input")

main()


Welcome to the movie recommendation system
Please choose from the following:
    1- Add to the watched list
    2- Show the watched list
    3- Get details for a movie
    4- Recommend a movie based on your watched list
    5- Recommend a movie based on a single movie
    6- End the program

Multiple movies found with the name '1':
[337115] 1 (movie, Genres: ['documentary'])
  Details:
    Actors: Unknown
    Directors: Tim O'Hara
    Genres: ['documentary']
    Rating: 5.9 (Votes: 20.0)
[346492] 1 (movie, Genres: ['sci-fi', 'thriller'])
  Details:
    Actors: Zoltán Mucsi, László Sinkó, Pál Mácsai, Vica Kerekes, Balázs Czukor, Zoltán Balázs, Máté Haumann, Krzysztof Rogacewicz, Zoltán Berzsenyi, Krisztina Bíró
    Directors: Pater Sparrow
    Genres: ['sci-fi', 'thriller']
    Rating: 6.0 (Votes: 550.0)
[448288] 1 (tvEpisode, Genres: ['comedy'])
  Details:
    Actors: Iván Bagi, Olivér Nacsa, Csaba Csajtay, László Gálházi
    Directors: Unknown
    Genres: ['comedy']
    Rating: Unkno

: 

In [28]:
print(watch_list("Blacksmith Scene",watchedMoviesIds))
watchedMoviesIds

Multiple movies found with the name 'Blacksmith Scene':
[4] Blacksmith Scene (short, Genres: ['short'])
  Details:
    Actors: Charles Kayser, John Ott
    Directors: William K.L. Dickson
    Genres: ['short']
    Rating: 6.2 (Votes: 2863.0)
[20] Blacksmith Scene (short, Genres: ['documentary', 'short'])
  Details:
    Actors: Unknown
    Directors: Louis Lumière
    Genres: ['documentary', 'short']
    Rating: 5.1 (Votes: 1181.0)
Invalid selection. No movie added to the watch list.
None


['tt0000002', 'tt0000003', 'tt0000004']

In [33]:
print(recommend_movies('tt0000002'))

No valid movie IDs found.
