In [1]:
# Import Required Libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Step 1: Load the Dataset
# Replace 'movie_dataset.csv' with the actual path to your dataset
movies_df = pd.read_csv('movie_dataset.csv')

# Step 2: Preprocess the Data
# Fill any missing values in the 'genres', 'overview', and 'tagline' columns
movies_df['genres'] = movies_df['genres'].fillna('')
movies_df['overview'] = movies_df['overview'].fillna('')
movies_df['tagline'] = movies_df['tagline'].fillna('')

# Combine 'genres', 'overview', and 'tagline' into a single content column
movies_df['content'] = (
    movies_df['genres'] + ' ' + movies_df['overview'] + ' ' + movies_df['tagline']
)

# Step 3: Create a TF-IDF Vectorizer and Compute the Similarity Matrix
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies_df['content'])

# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Step 4: Build the Recommendation Function
def get_recommendations(title, cosine_sim=cosine_sim, movies_df=movies_df):
    # Check if the movie title exists in the dataset
    if title not in movies_df['title'].values:
        return ["Movie not found in database."]

    # Get the index of the movie that matches the title
    idx = movies_df[movies_df['title'] == title].index[0]

    # Get the pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the top 10 most similar movies
    sim_scores = sim_scores[1:11]  # Exclude the movie itself

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar movies
    return movies_df['title'].iloc[movie_indices].tolist()

# Step 5: Evaluate the Recommendation System
# Example: Get recommendations for a specific movie
movie_title = "Spectre"  # Replace with any movie title from your dataset
recommended_movies = get_recommendations(movie_title)

print(f"Recommendations for '{movie_title}':")
for movie in recommended_movies:
    print(movie)


Recommendations for 'Spectre':
Never Say Never Again
From Russia with Love
Thunderball
Quantum of Solace
Octopussy
Safe Haven
Live and Let Die
The Man with the Golden Gun
Dr. No
Skyfall


In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Import Required Libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

def load_and_preprocess_data(file_path):
    """
    Loads and preprocesses the movie dataset.
    - Fills missing values in 'genres', 'overview', and 'tagline'.
    - Combines 'genres', 'overview', and 'tagline' into a single 'content' column.
    
    Args:
    - file_path (str): Path to the dataset CSV file.
    
    Returns:
    - pd.DataFrame: Processed movie dataset with an added 'content' column.
    """
    # Load the dataset
    movies_df = pd.read_csv(file_path)
    
    # Fill missing values
    movies_df['genres'] = movies_df['genres'].fillna('')
    movies_df['overview'] = movies_df['overview'].fillna('')
    movies_df['tagline'] = movies_df['tagline'].fillna('')
    
    # Combine text columns into a single content column
    movies_df['content'] = (
        movies_df['genres'] + ' ' + movies_df['overview'] + ' ' + movies_df['tagline']
    )
    
    return movies_df


def compute_cosine_similarity(movies_df):
    """
    Computes the cosine similarity matrix for the movie content.
    
    Args:
    - movies_df (pd.DataFrame): DataFrame with the 'content' column.
    
    Returns:
    - numpy.ndarray: Cosine similarity matrix for movie content.
    """
    # Initialize the TF-IDF Vectorizer
    tfidf = TfidfVectorizer(stop_words='english')
    
    # Transform the content column into a TF-IDF matrix
    tfidf_matrix = tfidf.fit_transform(movies_df['content'])
    
    # Compute the cosine similarity matrix
    cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
    
    return cosine_sim


def get_movie_recommendations(title, cosine_sim, movies_df, top_n=10):
    """
    Get the top-N movie recommendations based on content similarity.
    
    Args:
    - title (str): Title of the movie for which recommendations are needed.
    - cosine_sim (numpy.ndarray): Precomputed cosine similarity matrix.
    - movies_df (pd.DataFrame): Movie dataset with 'title' and 'content' columns.
    - top_n (int): Number of top similar movies to return.
    
    Returns:
    - list: List of recommended movie titles.
    """
    # Check if the movie title exists in the dataset
    if title not in movies_df['title'].values:
        return ["Movie not found in database."]
    
    # Get the index of the movie that matches the title
    idx = movies_df[movies_df['title'] == title].index[0]
    
    # Get the pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get the top-N most similar movies (excluding the movie itself)
    sim_scores = sim_scores[1:top_n+1]
    
    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]
    
    # Return the top-N most similar movie titles
    return movies_df['title'].iloc[movie_indices].tolist()


def main():
    """
    Main function to load data, compute similarities, and get movie recommendations.
    """
    # Step 1: Load and preprocess the dataset
    file_path = 'movie_dataset.csv'  # Replace with your dataset path
    movies_df = load_and_preprocess_data(file_path)
    
    # Step 2: Compute cosine similarity matrix
    cosine_sim = compute_cosine_similarity(movies_df)
    
    # Step 3: Get movie recommendations
    movie_title = "Spectre"  # Replace with the movie title of your choice
    recommended_movies = get_movie_recommendations(movie_title, cosine_sim, movies_df)
    
    # Display recommendations
    print(f"Recommendations for '{movie_title}':")
    for movie in recommended_movies:
        print(f"- {movie}")


if __name__ == "__main__":
    main()


Recommendations for 'Spectre':
- Never Say Never Again
- From Russia with Love
- Thunderball
- Quantum of Solace
- Octopussy
- Safe Haven
- Live and Let Die
- The Man with the Golden Gun
- Dr. No
- Skyfall
