# **Movie Recommendation System**


In [None]:
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
!pip install fuzzywuzzy
from fuzzywuzzy import process




In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
movies = pd.read_csv('drive/MyDrive/AIML project/moviesdata.csv',usecols=['movieId','title'])
movies.head()

Unnamed: 0,movieId,title
0,1,Toy Story (1995)
1,2,Jumanji (1995)
2,3,Grumpier Old Men (1995)
3,4,Waiting to Exhale (1995)
4,5,Father of the Bride Part II (1995)


In [None]:
ratings = pd.read_csv('drive/MyDrive/AIML project/ratings.csv',usecols=['userId','movieId','rating'])
ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [None]:
ratings.shape

(100836, 3)

In [None]:
movies.shape

(9742, 2)

In [None]:
# create a matix =  movie x userId
# and fill this matrix using user rating

ratings.pivot(index='movieId',columns='userId',values='rating')

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,,,4.0,,4.5,,,,...,4.0,,4.0,3.0,4.0,2.5,4.0,2.5,3.0,5.0
2,,,,,,4.0,,4.0,,,...,,4.0,,5.0,3.5,,,2.0,,
3,4.0,,,,,5.0,,,,,...,,,,,,,,2.0,,
4,,,,,,3.0,,,,,...,,,,,,,,,,
5,,,,,,5.0,,,,,...,,,,3.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193581,,,,,,,,,,,...,,,,,,,,,,
193583,,,,,,,,,,,...,,,,,,,,,,
193585,,,,,,,,,,,...,,,,,,,,,,
193587,,,,,,,,,,,...,,,,,,,,,,


In [None]:
# convert NaN values to 0

# reshaping
movies_users = ratings.pivot(index='movieId',columns='userId',values='rating').fillna(0)
movies_users.head()

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,0.0,0.0,4.0,0.0,4.5,0.0,0.0,0.0,...,4.0,0.0,4.0,3.0,4.0,2.5,4.0,2.5,3.0,5.0
2,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,...,0.0,4.0,0.0,5.0,3.5,0.0,0.0,2.0,0.0,0.0
3,4.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
mat_movies = csr_matrix(movies_users.values)
mat_movies

<9724x610 sparse matrix of type '<class 'numpy.float64'>'
	with 100836 stored elements in Compressed Sparse Row format>

In [None]:
# metric cosine --> using cosine similarity b/w vectors
# algp brute --> means each cell in movies_users will be used
# n_neig 20 --> means 20 similar results to be found
from sklearn.neighbors import NearestNeighbors
model = NearestNeighbors(metric='cosine',algorithm='brute',n_neighbors=20)
model.fit(mat_movies)


In [None]:
def recommender(movie_name, data, n=10):
    idx = process.extractOne(movie_name, movies['title'])[2]
    print('Movie Selected:', movies['title'][idx], 'Index:', idx)
    print()
    print('Searching for recommendations...')
    distance, indices = model.kneighbors(data[idx], n_neighbors=n)

    recommendations = []
    for i in indices[0]:
        if i != idx:
            recommendations.append(movies['title'].iloc[i])
            print(movies['title'].iloc[i])
    return recommendations

In [None]:
def calculate_accuracy(movie_name, n=10, threshold=4.0):
    idx = process.extractOne(movie_name, movies['title'])[2]
    recommendations = recommender(movie_name, mat_movies, n)

    target_users = ratings[ratings['movieId'] == movies['movieId'][idx]]['userId'].unique()

    similar_users = ratings[ratings['userId'].isin(target_users) & (ratings['rating'] >= threshold)]['userId'].unique()

    relevant_movies = ratings[ratings['userId'].isin(similar_users) & (ratings['rating'] >= threshold)]['movieId'].unique()
    relevant_movie_titles = movies[movies['movieId'].isin(relevant_movies)]['title'].tolist()
    #print(relevant_movie_titles)

    relevant_count = sum([1 for movie in recommendations if movie in relevant_movie_titles])
    accuracy_percentage = (relevant_count / len(recommendations) * 100) if recommendations else 0

    print(f"\nAccuracy: {accuracy_percentage:.2f}%")
    return accuracy_percentage


In [None]:
if __name__ == "__main__":
    movie_name = input("Enter a movie name for recommendations: ")
    accuracy_percentage = calculate_accuracy(movie_name)

Enter a movie name for recommendations: jumanji
Movie Selected: Jumanji (1995) Index: 1

Searching for recommendations...
Lion King, The (1994)
Mrs. Doubtfire (1993)
Mask, The (1994)
Jurassic Park (1993)
Home Alone (1990)
Nightmare Before Christmas, The (1993)
Aladdin (1992)
Beauty and the Beast (1991)
Ace Ventura: When Nature Calls (1995)

Accuracy: 100.00%
