In [1]:
import pandas as pd

movies = pd.read_csv("E:\Python Project\Movie Recommendation System With Python And Pandas Data Project\movie\movies.csv")

In [2]:
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
62418,209157,We (2018),Drama
62419,209159,Window of the Soul (2001),Documentary
62420,209163,Bad Poems (2018),Comedy|Drama
62421,209169,A Girl Thing (2001),(no genres listed)


# Cleaning Movie Titles With Regex

In [3]:
import re

def clean_title(title):
    return re.sub("[^a-zA0-9 ]", "",title)
    

In [4]:
movies["clean_title"] = movies["title"].apply(clean_title)

In [5]:
movies

Unnamed: 0,movieId,title,genres,clean_title
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,oy tory 1995
1,2,Jumanji (1995),Adventure|Children|Fantasy,umanji 1995
2,3,Grumpier Old Men (1995),Comedy|Romance,rumpier ld en 1995
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,aiting to xhale 1995
4,5,Father of the Bride Part II (1995),Comedy,ather of the ride art 1995
...,...,...,...,...
62418,209157,We (2018),Drama,e 2018
62419,209159,Window of the Soul (2001),Documentary,indow of the oul 2001
62420,209163,Bad Poems (2018),Comedy|Drama,ad oems 2018
62421,209169,A Girl Thing (2001),(no genres listed),A irl hing 2001


# Creating a TFIDF Matrix

In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(ngram_range=(1,2))

tfidf = vectorizer.fit_transform(movies["clean_title"])

# Creating a Search Function

In [7]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def search(title):
   # title = "Toy Story 1995"
    title = clean_title(title)
    query_vec = vectorizer.transform([title])
    similarity = cosine_similarity(query_vec, tfidf). flatten()
    indices = np.argpartition(similarity, -5)[-5:]
    result = movies.iloc[indices][::-1]
    return result

# Building an Interactive Search box with Jupyter 

In [8]:
import ipywidgets as widgets
from IPython.display import display

movie_input = widgets.Text(
    value='Toy Story',
    description='Movie Title:',
    disabled=False
)
movie_list = widgets.Output()

def on_type(data):
    with movie_list:
        movie_list.clear_output()
        title = data['new']
        if len(title) > 5:
            display(search(title))

movie_input.observe(on_type, names='value')

display(movie_input,movie_list)

Text(value='Toy Story', description='Movie Title:')

Output()

# Reading In Movie Ratings Data

In [None]:
import pandas as pd
ratings = pd.read_csv("E:\\Python Project\\Movie Recommendation System With Python And Pandas Data Project\\movie\\ratings.csv")

In [None]:
ratings

In [None]:
ratings.head(11)     # top 10 ratings movies

In [None]:
ratings.dtypes

# Finding Users who Liked the same movie

In [None]:
movie_id = 1

In [None]:
similar_users = ratings[(ratings["movieId"] == movie_id) & (ratings["rating"] > 4)]["userId"].unique()

In [None]:
similar_users 

In [None]:
similar_users_recs = ratings[(ratings["userId"].isin(similar_users)) & (ratings["rating"] >4)]

In [None]:
similar_users_recs

In [None]:
similar_users_recs = ratings[(ratings["userId"].isin(similar_users)) & (ratings["rating"] >4)] ["movieId"]

In [None]:
similar_users_recs

In [None]:
similar_users_recs = similar_users_recs.value_counts() / len(similar_users)

similar_user_recs = similar_users_recs[similar_users_recs >.1]

In [None]:
similar_users_recs

# Finding How Much All user like Movies

In [None]:
all_users = ratings[(ratings["movieId"].isin(similar_users_recs.index)) & (ratings["rating"] > 4 )]

In [None]:
all_users_recs = all_users["movieId"].value_counts() / len(all_users["userId"].unique())

In [None]:
all_users_recs

# Recommendation Score

In [None]:
rec_percentages = pd.concat([similar_users_recs, all_users_recs], axis=1)
rec_percentages.columns = ["similar","all"]

In [None]:
rec_percentages

In [None]:
rec_percentages["score"] = rec_percentages["similar"] / rec_percentages["all"]

In [None]:
rec_percentages =  rec_percentages.sort_values("score", ascending =False)

In [None]:
rec_percentages

In [None]:
rec_percentages.head(11).merge(movies, left_index=True, right_on="movieId")

 # Build Recommendation Function

In [None]:
def find_similar_movies(movie_id):
    similar_users = ratings[(ratings["movieId"] == movie_id) & (ratings["rating"] > 4)]["userId"].unique()
    similar_user_recs = ratings[(ratings["userId"].isin(similar_users)) & (ratings["rating"] > 4)]["movieId"]
    
    similar_user_recs = similar_user_recs.value_counts() / len(similar_users)
    similar_user_recs = similar_user_recs[similar_user_recs > 0.10]
    
    all_users = ratings[(ratings["movieId"].isin(similar_user_recs.index)) & (ratings["rating"] > 4)]
    all_users_recs = all_users["movieId"].value_counts() / len(all_users["userId"].unique())
    
    rec_percentages = pd.concat([similar_user_recs, all_users_recs], axis=1)
    rec_percentages.columns = ["similar", "all"]
    
    rec_percentages["score"] = rec_percentages["similar"] / len(rec_percentages["all"])
    
    rec_percentages = rec_percentages.sort_values("score", ascending=False)
    return rec_percentages.head(11).merge(movies, left_index=True, right_on="movieId")[["score", "title", "genres"]]


# Interaction Recommendation  Widget

In [None]:
movie_name_input = widgets.Text(
    value="Toy Story",
    description="Movie Title:",
    disabled=False,

)

recommendation_list = widgets.Output()

def on_type(data):
    with recommendation_list:
        recommendation_list.clear_output()
        title = data ["new"]
        if len(title) > 5:
            results = search(title)
            movie_id = results.iloc[0]["movieId"]
            display(find_similar_movies(movie_id))

movie_name_input.observe(on_type , names="value")

((display(movie_name_input , recommendation_list)))