<a href="https://colab.research.google.com/github/roshni33/100-days-of-AI/blob/main/movie_recommendation_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [21]:
#latest code - ironman,iron man + o/p include iron man,iron man 2 , iron man 3

import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import difflib

# Load the dataset and clean it
df = pd.read_csv('/content/movies.csv',low_memory=False)
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
selected_features = ['genres', 'keywords', 'tagline', 'cast', 'director']

# Replace NaN values with empty strings
for feature in selected_features:
    df[feature] = df[feature].fillna('')

# Combine selected features into a single text-based feature
combined_features = df['genres'] + ' ' + df['keywords'] + ' ' + df['tagline'] + ' ' + df['cast'] + ' ' + df['director']

# Convert text to feature vectors using TF-IDF
vectorizer = TfidfVectorizer()
feature_vectors = vectorizer.fit_transform(combined_features)

# Calculate similarity matrix
similarity = cosine_similarity(feature_vectors)

# User input movie name
movie_name = input("Enter movie name - ").lower()

# Ensure the 'title' column has no null values and remove them
df['title'] = df['title'].fillna('')  # Replace NaN in title column with empty strings

# Normalize movie titles by removing spaces for matching purposes
df['normalized_title'] = df['title'].str.replace(' ', '').str.lower()

# Normalize user input movie name by removing spaces
normalized_movie_name = movie_name.replace(' ', '')

# Find all movie titles and find close matches for the normalized input movie
list_of_all_titles = df['normalized_title'].tolist()  # Convert titles to lowercase without spaces

# Find the closest match for the user's normalized movie name
find_close_match = difflib.get_close_matches(normalized_movie_name, list_of_all_titles)

# Handle the case where no close match is found
if not find_close_match:
    print(f"No match found for the movie {movie_name}")
else:
    # Find the index of the first close match (assume it's the best match)
    close_match_title = find_close_match[0]
    movie_index = df[df['normalized_title'] == close_match_title].index[0]  # Get the row index

    # Get similarity scores for all movies
    similarity_score = list(enumerate(similarity[int(movie_index)]))

    # Sort movies based on similarity scores
    sorted_similar_movies = sorted(similarity_score, key=lambda x: x[1], reverse=True)

    # Create a list to hold movie titles in the recommendations
    recommended_movies = []

    # Start by including the user-input movie and its sequels (if available)
    user_movie_series = []
    for title in list_of_all_titles:
        if normalized_movie_name in title:  # Find all movies that contain the normalized input movie name
            user_movie_series.append(df[df['normalized_title'] == title]['title'].values[0])

    # Add user movie series (e.g., Iron Man, Iron Man 2, Iron Man 3) at the top
    recommended_movies.extend(user_movie_series)

    # Add other similar movies to fill up the top 10 suggestions
    for movie in sorted_similar_movies:
        index = movie[0]
        title_from_index = df['title'].iloc[index]

        # Avoid adding duplicates from the user movie series
        if title_from_index not in recommended_movies:
            recommended_movies.append(title_from_index)

        # Stop when the top 10 recommendations are collected
        if len(recommended_movies) >= 10:
            break

    # Display top 10 recommended movies
    print("Top 10 suggested movies are -")
    for i, title in enumerate(recommended_movies[:10], 1):
        print(f"{i}. {title}")


Enter movie name - ironman
Top 10 suggested movies are -
1. Iron Man 3
2. Iron Man
3. Iron Man 2
4. Avengers: Age of Ultron
5. The Avengers
6. Captain America: Civil War
7. Captain America: The Winter Soldier
8. Ant-Man
9. X-Men
10. Made
