# Importing Python Libraries

In [35]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
import pickle

# Read the CSV file

In [36]:
df = pd.read_csv("updated_movie_92.csv", encoding='latin-1')

# Select features for content-based filtering

In [37]:
features = ['Star_actors', 'Genre', 'Director']

# Fill missing values with empty string

In [38]:
for feature in features:
    df[feature] = df[feature].fillna('')

# Create a new column 'combined_features'

In [39]:
def combined_features(row):
    # Split the 'Star_actors' into a list
    star_actors = row['Star_actors'].split('|')
    
    # Split the 'Genre' into a list
    genres = row['Genre'].split('|')
    
    director = row['Director']
    
    # Join the separated values with spaces
    return ' '.join(star_actors) + " " + ' '.join(genres) + " " + director

# Apply the 'combined_features' function to create a new column

In [40]:
df['combined_features'] = df.apply(combined_features, axis=1)

# TF-IDF vectorization for the entire dataset

In [41]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(df['combined_features'])

# Calculate cosine similarity for the entire dataset

In [42]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [43]:
# data = {
#     'tfidf_vectorizer': tfidf_vectorizer,
#     'cosine_similarity': cosine_sim,
#     'df' : df
# }

# Save the cosine similarity matrix and TF-IDF Vectorizer

In [44]:
# with open('movie_recommendation_data.pkl', 'wb') as file:
#     pickle.dump(data, file)

# Function to print recommended movies

In [45]:
def recommend_movies(title, data,df, top_n=15):
    df = data['df']
    movie_indices = df[df['Series_Title'] == title].index.values
    if len(movie_indices) > 0:
        movie_index = movie_indices[0]
        if movie_index < len(data['cosine_similarity']) and movie_index < len(df):
            similar_movies = list(enumerate(data['cosine_similarity'][movie_index]))
            sorted_similar_movies = sorted(similar_movies, key=lambda x: x[1], reverse=True)[:top_n]
            recommended_movies = []
            for movie in sorted_similar_movies:
                if movie[0] < len(df):
                    recommended_movie_title = df.loc[movie[0], 'Series_Title']
                    if recommended_movie_title != title:
                        recommended_movies.append((recommended_movie_title, movie[1]))
            return recommended_movies
    return None

In [46]:
data = {
    'tfidf_vectorizer': tfidf_vectorizer,
    'cosine_similarity': cosine_sim,
    'predict' : recommend_movies,
    'df': df
}

# User input movie

In [47]:
movie_user_like = "The Avengers"

# Get recommendations for the movie the user likes

In [48]:
recommended_movies = recommend_movies(movie_user_like,data, df)

# Check if there are recommendations

In [49]:
if recommended_movies:
    print(f"Recommended movies for '{movie_user_like}':")
    
    # # Print each recommended movie
    # for recommended_movie in recommended_movies:
    #     print(recommended_movie)
    
     # Print each recommended movie with its similarity score
    for recommended_movie, score in recommended_movies:
        print(f"{recommended_movie} (Similarity Score: {score:.2f})")
else:
    # Print a message if there are no recommendations
    print(f"No recommendations for '{movie_user_like}' found.")

Recommended movies for 'The Avengers':
Captain America: Civil War (Similarity Score: 0.58)
Captain America: Civil War (Similarity Score: 0.56)
Captain America: The Winter Soldier (Similarity Score: 0.38)
Avengers: Endgame (Similarity Score: 0.35)
Spider-Man: Homecoming (Similarity Score: 0.34)
Avengers: Infinity War (Similarity Score: 0.32)
Serenity (Similarity Score: 0.30)
Iron Man (Similarity Score: 0.28)
Mission: Impossible â Ghost Protocol (Similarity Score: 0.26)
Mission: Impossible â Rogue Nation (Similarity Score: 0.25)
Arrival (Similarity Score: 0.25)
The Prestige (Similarity Score: 0.25)
Her (Similarity Score: 0.23)
Sherlock Holmes (Similarity Score: 0.22)


In [50]:
with open('movie_recommendation_data.pkl', 'wb') as file:
    pickle.dump(data, file)