# Movie Recommendations

## This 

In [1]:
import pandas as pd

# https://files.grouplens.org/datasets/movielens/ml-25m.zip
# Using pandas to open 
movies = pd.read_csv('movies.csv')

In [2]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [3]:
import re

def clean_title(title):
    title = re.sub("[^a-zA-Z0-9 ]", "", title)
    return title

In [4]:
movies["clean_title"] = movies["title"].apply(clean_title)

In [5]:
movies

Unnamed: 0,movieId,title,genres,clean_title
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,Toy Story 1995
1,2,Jumanji (1995),Adventure|Children|Fantasy,Jumanji 1995
2,3,Grumpier Old Men (1995),Comedy|Romance,Grumpier Old Men 1995
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,Waiting to Exhale 1995
4,5,Father of the Bride Part II (1995),Comedy,Father of the Bride Part II 1995
...,...,...,...,...
62418,209157,We (2018),Drama,We 2018
62419,209159,Window of the Soul (2001),Documentary,Window of the Soul 2001
62420,209163,Bad Poems (2018),Comedy|Drama,Bad Poems 2018
62421,209169,A Girl Thing (2001),(no genres listed),A Girl Thing 2001


In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(ngram_range=(1,2))

tfidf = vectorizer.fit_transform(movies["clean_title"])

In [7]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def search(title):
    title = clean_title(title)
    query_vec = vectorizer.transform([title])
    similarity = cosine_similarity(query_vec, tfidf).flatten()
    indices = np.argpartition(similarity, -5)[-5:]
    results = movies.iloc[indices].iloc[::-1]
    
    return results

In [8]:
%pip install ipywidgets
#jupyter labextension install @jupyter-widgets/jupyterlab-manager

Note: you may need to restart the kernel to use updated packages.


In [9]:
import ipywidgets as widgets
from IPython.display import display

movie_input = widgets.Text(
    value='Toy Story',
    description='Movie Title:',
    disabled=False
)
movie_list = widgets.Output()

def on_type(data):
    with movie_list:
        movie_list.clear_output()
        title = data["new"]
        if len(title) > 5:
            display(search(title))

movie_input.observe(on_type, names='value')


display(movie_input, movie_list)

Text(value='Toy Story', description='Movie Title:')

Output()

In [88]:
movie_id = int(input("digit a id"))
#def find_similar_movies(movie_id):
movie = movies[movies["movieId"] == movie_id]

In [61]:
ratings = pd.read_csv("C:/Users/Pichau/Downloads/ratings.csv")

In [89]:
ratings.dtypes

userId         int64
movieId        int64
rating       float64
timestamp      int64
dtype: object

In [90]:
similar_users = ratings[(ratings["movieId"] == movie_id) & (ratings["rating"] > 4)]["userId"].unique()

In [91]:
similar_user_recs = ratings[(ratings["userId"].isin(similar_users)) & (ratings["rating"] > 4)]["movieId"]

In [92]:
similar_user_recs = similar_user_recs.value_counts() / len(similar_users)

similar_user_recs = similar_user_recs[similar_user_recs > .10]

In [93]:
all_users = ratings[(ratings["movieId"].isin(similar_user_recs.index)) & (ratings["rating"] > 4)]

In [94]:
all_user_recs = all_users["movieId"].value_counts() / len(all_users["userId"].unique())

In [95]:
rec_percentages = pd.concat([similar_user_recs, all_user_recs], axis=1)
rec_percentages.columns = ["similar", "all"]

In [96]:
rec_percentages

Unnamed: 0_level_0,similar,all
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
106072,1.000000,0.005270
89745,0.793409,0.040318
59315,0.683143,0.054740
86332,0.638783,0.010106
112852,0.631179,0.043317
...,...,...
2858,0.101394,0.169088
44199,0.101394,0.016946
104211,0.101394,0.004789
31685,0.100127,0.008537


In [97]:
rec_percentages["score"] = rec_percentages["similar"] / rec_percentages["all"]

In [98]:
rec_percentages = rec_percentages.sort_values("score", ascending=False)

In [99]:
rec_percentages.head(10).merge(movies, left_index=True, right_on="movieId")


Unnamed: 0,similar,all,score,movieId,title,genres,clean_title
20513,1.0,0.00527,189.745247,106072,Thor: The Dark World (2013),Action|Adventure|Fantasy|IMAX,Thor The Dark World 2013
21454,0.155894,0.001991,78.317669,110553,The Amazing Spider-Man 2 (2014),Action|Sci-Fi|IMAX,The Amazing SpiderMan 2 2014
20018,0.17237,0.002612,65.998347,103772,"Wolverine, The (2013)",Action|Adventure|Fantasy|Sci-Fi,Wolverine The 2013
16312,0.638783,0.010106,63.206612,86332,Thor (2011),Action|Adventure|Drama|Fantasy|IMAX,Thor 2011
25073,0.1673,0.002752,60.792167,122924,X-Men: Apocalypse (2016),Action|Adventure|Fantasy|Sci-Fi,XMen Apocalypse 2016
16595,0.114068,0.001964,58.08528,87520,Transformers: Dark of the Moon (2011),Action|Adventure|Sci-Fi|War|IMAX,Transformers Dark of the Moon 2011
17595,0.105196,0.002051,51.299204,91974,Underworld: Awakening (2012),Action|Fantasy|Horror|IMAX,Underworld Awakening 2012
19841,0.197719,0.003888,50.859551,103042,Man of Steel (2013),Action|Adventure|Fantasy|Sci-Fi|IMAX,Man of Steel 2013
17178,0.126743,0.002672,47.436312,90249,Real Steel (2011),Action|Drama|Sci-Fi|IMAX,Real Steel 2011
17872,0.106464,0.002244,47.436312,93363,John Carter (2012),Action|Adventure|Sci-Fi|IMAX,John Carter 2012


In [100]:
def find_similar_movies(movie_id):
    similar_users = ratings[(ratings["movieId"] == movie_id) & (ratings["rating"] > 4)]["userId"].unique()
    similar_user_recs = ratings[(ratings["userId"].isin(similar_users)) & (ratings["rating"] > 4)]["movieId"]
    similar_user_recs = similar_user_recs.value_counts() / len(similar_users)

    similar_user_recs = similar_user_recs[similar_user_recs > .10]
    all_users = ratings[(ratings["movieId"].isin(similar_user_recs.index)) & (ratings["rating"] > 4)]
    all_user_recs = all_users["movieId"].value_counts() / len(all_users["userId"].unique())
    rec_percentages = pd.concat([similar_user_recs, all_user_recs], axis=1)
    rec_percentages.columns = ["similar", "all"]
    
    rec_percentages["score"] = rec_percentages["similar"] / rec_percentages["all"]
    rec_percentages = rec_percentages.sort_values("score", ascending=False)
    return rec_percentages.head(10).merge(movies, left_index=True, right_on="movieId")[["score", "title", "genres"]]

In [106]:
import ipywidgets as widgets
from IPython.display import display

movie_name_input = widgets.Text(
    value='Toy Story',
    description='Movie Title:',
    disabled=False
)
recommendation_list = widgets.Output()

def on_type(data):
    with recommendation_list:
        recommendation_list.clear_output()
        title = data["new"]
        if len(title) > 5:
            results = search(title)
            movie_id = results.iloc[0]["movieId"]
            display(find_similar_movies(movie_id))



In [46]:
movie_name_input.observe(on_type, names='value')
display(movie_name_input, recommendation_list)

Text(value='Thor: The Dark World', description='Movie Title:')

Output()

In [103]:
display(find_similar_movies(89745))

Unnamed: 0,score,title,genres
17067,24.716368,"Avengers, The (2012)",Action|Adventure|Sci-Fi|IMAX
20513,19.610199,Thor: The Dark World (2013),Action|Adventure|Fantasy|IMAX
25058,19.49177,Avengers: Age of Ultron (2015),Action|Adventure|Sci-Fi
19678,17.867419,Iron Man 3 (2013),Action|Sci-Fi|Thriller|IMAX
16725,17.843074,Captain America: The First Avenger (2011),Action|Adventure|Sci-Fi|Thriller|War
16312,17.299824,Thor (2011),Action|Adventure|Drama|Fantasy|IMAX
21348,17.183667,Captain America: The Winter Soldier (2014),Action|Adventure|Sci-Fi|IMAX
25071,16.649399,Captain America: Civil War (2016),Action|Sci-Fi|Thriller
25061,15.865628,Ant-Man (2015),Action|Adventure|Sci-Fi
14628,15.651921,Iron Man 2 (2010),Action|Adventure|Sci-Fi|Thriller|IMAX


In [104]:
display(search("Toy Story"))

Unnamed: 0,movieId,title,genres,clean_title
3021,3114,Toy Story 2 (1999),Adventure|Animation|Children|Comedy|Fantasy,Toy Story 2 1999
14813,78499,Toy Story 3 (2010),Adventure|Animation|Children|Comedy|Fantasy|IMAX,Toy Story 3 2010
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,Toy Story 1995
59767,201588,Toy Story 4 (2019),Adventure|Animation|Children|Comedy,Toy Story 4 2019
20497,106022,Toy Story of Terror (2013),Animation|Children|Comedy,Toy Story of Terror 2013
