<a href="https://colab.research.google.com/github/reshma0209/Machine-learning/blob/main/Movie_Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
# Install required libraries in Colab

# Importing necessary libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the IMDb Top 1000 dataset (upload the dataset to Colab first or mount Google Drive)
from google.colab import files
uploaded = files.upload()  # Upload the imdb_top_1000.csv here

# Load dataset into a pandas DataFrame
movies = pd.read_csv('imdb_top_1000.csv')

# Inspect the first few rows of the dataset
movies.head()

# Selecting relevant features for the recommendation system
# We will combine 'Genre', 'Director', and 'Star1', 'Star2', 'Star3', 'Star4'
movies['combined_features'] = movies['Genre'] + ' ' + movies['Director'] + ' ' + movies['Star1'] + ' ' + movies['Star2'] + ' ' + movies['Star3'] + ' ' + movies['Star4']

# Convert the combined features to lowercase for uniformity
movies['combined_features'] = movies['combined_features'].str.lower()

# Vectorizing the combined features using TF-IDF (Term Frequency-Inverse Document Frequency)
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['combined_features'])

# Compute the cosine similarity matrix between all movies
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Function to recommend movies based on a selected movie title
def recommend_movies(title, cosine_sim=cosine_sim):
    # Get the index of the movie that matches the title
    try:
        idx = movies[movies['Series_Title'].str.lower() == title.lower()].index[0]
    except IndexError:
        return "Movie not found. Please check the spelling."

    # Get the pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on similarity scores in descending order
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the indices of the top 10 most similar movies
    sim_scores = sim_scores[1:11]  # Top 10 excluding the input movie itself

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar movies
    return movies['Series_Title'].iloc[movie_indices]

# Test the recommendation system by passing a movie title
print("Movies similar to 'The Godfather':")
print(recommend_movies('The Godfather'))


Movies similar to 'The Godfather':
3        The Godfather: Part II
974     The Godfather: Part III
74               Apocalypse Now
649                 The Insider
693            The Conversation
305           On the Waterfront
823         Glengarry Glen Ross
447    A Streetcar Named Desire
398            Scent of a Woman
484                The Irishman
Name: Series_Title, dtype: object
