<a href="https://colab.research.google.com/github/lakshmi-durga05/bharatintern-task2-movierecommendations/blob/main/movierecommendations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Load dataset with specified encoding
movies_data = pd.read_csv('/content/drive/MyDrive/bharatintern/IMDb Movies India.csv', encoding='latin-1')  # Assuming you have a IMDb Movies India.csv file

# Combine relevant attributes into a single column
movies_data['features'] = movies_data['Name'] + ' ' + movies_data['Director'] + ' ' + movies_data['Actor 1'] + ' ' + movies_data['Actor 2'] + ' ' + movies_data['Actor 3']

# Fill missing values with an empty string
movies_data['features'] = movies_data['features'].fillna('')

# Initialize TfidfVectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english')

# Fit and transform the features
tfidf_matrix = tfidf_vectorizer.fit_transform(movies_data['features'])

# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Function to recommend similar movies
def recommend_movies(movie_title, n=5):
    try:
        # Get the index of the movie that matches the title
        idx = movies_data[movies_data['Name'].str.lower() == movie_title.lower()].index[0]
    except IndexError:
        print(f"Movie '{movie_title}' not found in the dataset.")
        return

    # Get the pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Filter out the input movie
    sim_scores = [(i, score) for i, score in sim_scores if movies_data.iloc[i]['Name'].lower() != movie_title.lower()]

    # Get the scores of the n most similar movies
    sim_scores = sim_scores[:n]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Get the top n most similar movies
    recommended_movies = movies_data[['Name', 'Year']].iloc[movie_indices]

    # Print the recommended movies with cosine similarity scores
    print(f"Recommended Movies for '{movie_title}':")
    for i, (movie_index, score) in enumerate(zip(movie_indices, sim_scores), 1):
        recommended_movie = recommended_movies.iloc[i-1]
        print(f"{i}. {recommended_movie['Name']} ({recommended_movie['Year']}), Similarity Score: {score[1]}")

# Example usage
movie_title = '10ml Love'
recommend_movies(movie_title)


Recommended Movies for '10ml Love':
1. N.H 10 ((2015)), Similarity Score: 0.23575586003607304
2. Sui Dhaaga ((2018)), Similarity Score: 0.23047633737104783
3. Anaam (nan), Similarity Score: 0.22812235911840104
4. Kaalakaandi ((2018)), Similarity Score: 0.22774626020857708
5. Doordarshan ((2020)), Similarity Score: 0.22766203129603305
