## Movie Recommendation

 Import Libraries 
 

In [2]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

Load Data

In [3]:
# Load data
movies_metadata = pd.read_csv('data.csv')

# Display the first few rows of the dataset
movies_metadata.head()


Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title
0,James Cameron,CCH Pounder,Joel David Moore,Wes Studi,Action Adventure Fantasy Sci-Fi,avatar
1,Gore Verbinski,Johnny Depp,Orlando Bloom,Jack Davenport,Action Adventure Fantasy,pirates of the caribbean: at world's end
2,Sam Mendes,Christoph Waltz,Rory Kinnear,Stephanie Sigman,Action Adventure Thriller,spectre
3,Christopher Nolan,Tom Hardy,Christian Bale,Joseph Gordon-Levitt,Action Thriller,the dark knight rises
4,Doug Walker,Doug Walker,Rob Walker,unknown,Documentary,star wars: episode vii - the force awakens ...


 Data Preprocessing

In [4]:
# Fill NaN values with empty strings
movies_metadata['director_name'] = movies_metadata['director_name'].fillna('')
movies_metadata['actor_1_name'] = movies_metadata['actor_1_name'].fillna('')
movies_metadata['actor_2_name'] = movies_metadata['actor_2_name'].fillna('')
movies_metadata['actor_3_name'] = movies_metadata['actor_3_name'].fillna('')
movies_metadata['genres'] = movies_metadata['genres'].fillna('')
movies_metadata['movie_title'] = movies_metadata['movie_title'].fillna('')

# Combine the relevant features into a single string
movies_metadata['combined_features'] = movies_metadata['director_name'] + ' ' + \
                                       movies_metadata['actor_1_name'] + ' ' + \
                                       movies_metadata['actor_2_name'] + ' ' + \
                                       movies_metadata['actor_3_name'] + ' ' + \
                                       movies_metadata['genres']

# Display the first few rows with the combined features
movies_metadata[['movie_title', 'combined_features']].head()


Unnamed: 0,movie_title,combined_features
0,avatar,James Cameron CCH Pounder Joel David Moore Wes...
1,pirates of the caribbean: at world's end,Gore Verbinski Johnny Depp Orlando Bloom Jack ...
2,spectre,Sam Mendes Christoph Waltz Rory Kinnear Stepha...
3,the dark knight rises,Christopher Nolan Tom Hardy Christian Bale Jos...
4,star wars: episode vii - the force awakens ...,Doug Walker Doug Walker Rob Walker unknown Doc...


 Create a TF-IDF Matrix

In [5]:
# Create a TF-IDF Vectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english')

# Apply the TF-IDF Vectorizer to the combined features
tfidf_matrix = tfidf_vectorizer.fit_transform(movies_metadata['combined_features'])

# Display the shape of the TF-IDF matrix
tfidf_matrix.shape


(5043, 8588)

Compute the Cosine Similarity Matrix

In [6]:
# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Display the shape of the cosine similarity matrix
cosine_sim.shape


(5043, 5043)

Build a Recommendation Function

In [7]:
# Convert all movie titles to lowercase
movies_metadata['movie_title'] = movies_metadata['movie_title'].str.lower()

# Update the reverse mapping with lowercase titles
indices = pd.Series(movies_metadata.index, index=movies_metadata['movie_title']).drop_duplicates()

def get_recommendations(title, cosine_sim=cosine_sim):
    # Convert the title to lowercase
    title = title.strip().lower()
    
    # Get the index of the movie that matches the title
    if title not in indices:
        return f"Movie '{title}' not found in the dataset."
    
    idx = indices[title]
    
    # Get the pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]
    
    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]
    
    # Return the top 10 most similar movies
    return movies_metadata['movie_title'].iloc[movie_indices]


In [8]:
# Example usage
movie_title = 'The Dark Knight'
recommendations = get_recommendations(movie_title)
if isinstance(recommendations, str):
    print(recommendations)
else:
    print(f"Recommendations for '{movie_title}':")
    for idx, title in enumerate(recommendations, start=1):
        print(f"{idx}. {title.title()}")

Recommendations for 'The Dark Knight':
1. Batman Begins
2. The Dark Knight Rises
3. The Prestige
4. I'M Not There.
5. The Patriot
6. Brokeback Mountain
7. The Brothers Grimm
8. The Order
9. Harsh Times
10. Lords Of Dogtown
