In [49]:
# !pip install fuzzywuzzy[speedup]

In [50]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore') # To clean up output

from fuzzywuzzy import process

In [51]:
# Load ratings data
ratings = pd.read_csv('u.data', sep='\t', names=['user_id', 'movie_id', 'rating', 'timestamp'])

# Load movie info
movies = pd.read_csv('u.item', sep='|', encoding='latin-1', names=[
    'movie_id', 'title', 'release_date', 'video_release_date', 'IMDb_URL',
    'unknown', 'Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime',
    'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery',
    'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'
])

# Merge datasets
data = pd.merge(ratings, movies[['movie_id', 'title']], on='movie_id')

In [52]:
# Create user-movie matrix
movie_matrix = data.pivot_table(index='user_id', columns='title', values='rating')

# Drop movies rated by <50 users and users who rated <10 movies
movie_matrix = movie_matrix.dropna(axis=1, thresh=50)
movie_matrix = movie_matrix.dropna(axis=0, thresh=10)

In [53]:
def get_closest_movie_title(user_input, all_titles):
    match, score = process.extractOne(user_input, all_titles)
    return match, score

In [54]:
user_input = input("Enter a movie name (e.g., Toy Story): ")

# Find closest match using fuzzy matching
matched_title, confidence = get_closest_movie_title(user_input, movie_matrix.columns)

print(f"\nBest match: {matched_title} (Confidence: {confidence}%)")

# Recommend only if confidence is high
if confidence < 60:
    print("Could not confidently match your input. Please try again.")
else:
    user_movie = matched_title

    # Find similar movies
    similar_movies = movie_matrix.corrwith(movie_matrix[user_movie])
    corr_df = pd.DataFrame(similar_movies, columns=['Correlation'])
    corr_df.dropna(inplace=True)

    # Add number of ratings per movie
    rating_counts = data.groupby('title')['rating'].count()
    corr_df = corr_df.join(rating_counts)
    corr_df.rename(columns={'rating': 'Rating Count'}, inplace=True)

    # Filter out less-rated movies and the movie itself
    recommendations = corr_df[corr_df['Rating Count'] > 50]
    recommendations = recommendations[recommendations.index != user_movie]
    recommendations = recommendations.sort_values('Correlation', ascending=False)

    # Display top 10
    print(f"\n Top 10 movie recommendations for: {user_movie}\n")
    print(recommendations.head(10))

Enter a movie name (e.g., Toy Story): Star Wars

Best match: Star Wars (1977) (Confidence: 90%)

 Top 10 movie recommendations for: Star Wars (1977)

                                                    Correlation  Rating Count
title                                                                        
Empire Strikes Back, The (1980)                        0.747981           367
Return of the Jedi (1983)                              0.672556           507
Raiders of the Lost Ark (1981)                         0.536117           420
Giant (1956)                                           0.488093            51
Life Less Ordinary, A (1997)                           0.411638            53
Austin Powers: International Man of Mystery (1997)     0.377433           130
Sting, The (1973)                                      0.367538           241
Indiana Jones and the Last Crusade (1989)              0.350107           331
Pinocchio (1940)                                       0.347868       