# Hybrid Recommender System
# Data Preparation

In [1]:
import pandas as pd

In [134]:
movie = pd.read_csv("datasets/movie.csv")
rating = pd.read_csv("datasets/rating.csv")

Read datasets from csv files.

In [169]:
df = movie.merge(rating, how="left", on="movieId")

In [4]:
df.to_parquet("datasets/dataset.parquet")

Merged movie data to rating data on movieId. Saved file to parquet for continue with Apache Spark and compression. Finaly delete csv files.

In [1]:
from pyspark.sql import SparkSession, functions as F

import findspark

findspark.init("C:\Program Files\Spark\spark-3.3.1-bin-hadoop3")

In [2]:
spark = SparkSession.builder \
    .master("local[2]") \
    .appName("Hybrid Recommender System") \
    .config("spark.sql.adaptive.enabled", True) \
    .getOrCreate()

In [3]:
path = "file:///Users/talha/OneDrive/Masaüstü/Talha Nebi Kumru/Data Science/Miuul/Recommendation Systems/Hybrid_Recommender_System/datasets/dataset.parquet"

df = spark.read \
    .format("parquet") \
    .option("header", True) \
    .option("inferSchema", True) \
    .load(path)

In [4]:
df.show(5)

+-------+----------------+--------------------+------+------+-------------------+-----------------+
|movieId|           title|              genres|userId|rating|          timestamp|__index_level_0__|
+-------+----------------+--------------------+------+------+-------------------+-----------------+
|      1|Toy Story (1995)|Adventure|Animati...|   3.0|   4.0|1999-12-11 13:36:47|                0|
|      1|Toy Story (1995)|Adventure|Animati...|   6.0|   5.0|1997-03-13 17:50:52|                1|
|      1|Toy Story (1995)|Adventure|Animati...|   8.0|   4.0|1996-06-05 13:37:51|                2|
|      1|Toy Story (1995)|Adventure|Animati...|  10.0|   4.0|1999-11-25 02:44:47|                3|
|      1|Toy Story (1995)|Adventure|Animati...|  11.0|   4.5|2009-01-02 01:13:41|                4|
+-------+----------------+--------------------+------+------+-------------------+-----------------+
only showing top 5 rows



In [5]:
df.printSchema()

root
 |-- movieId: long (nullable = true)
 |-- title: string (nullable = true)
 |-- genres: string (nullable = true)
 |-- userId: double (nullable = true)
 |-- rating: double (nullable = true)
 |-- timestamp: string (nullable = true)
 |-- __index_level_0__: long (nullable = true)



In [4]:
df = df.drop("__index_level_0__")

In [5]:
df = df.withColumn("timestamp", F.to_timestamp(F.col("timestamp")))

In [8]:
df.groupby("title").count().show(5)

+--------------------+-----+
|               title|count|
+--------------------+-----+
|    Fair Game (1995)| 1295|
| If Lucy Fell (1996)| 1136|
| Three Wishes (1995)|  365|
|Heavenly Creature...| 7681|
|Paris, France (1993)|   47|
+--------------------+-----+
only showing top 5 rows



In [6]:
df.createOrReplaceTempView("Movies")

In [7]:
rare_movies = spark.sql("""
          SELECT title as rare FROM Movies
          GROUP BY title
          HAVING COUNT(title) >= 1000
""")

In [11]:
rare_movies.show(5)

+--------------------+
|                rare|
+--------------------+
|    Fair Game (1995)|
| If Lucy Fell (1996)|
|Heavenly Creature...|
|Snow White and th...|
|Night of the Livi...|
+--------------------+
only showing top 5 rows



In [8]:
rare_movies.createOrReplaceTempView("Rare_Movies")

In [9]:
common_movies = spark.sql("""
    SELECT * FROM Movies
    INNER JOIN Rare_Movies ON
    Movies.title = Rare_Movies.rare
    ORDER BY RAND()
""")

In [14]:
common_movies.show(5)

+-------+--------------------+--------------------+-------+------+-------------------+--------------------+
|movieId|               title|              genres| userId|rating|          timestamp|                rare|
+-------+--------------------+--------------------+-------+------+-------------------+--------------------+
|   5025|Orange County (2002)|              Comedy|97260.0|   3.5|2015-03-03 18:00:42|Orange County (2002)|
|   4848|Mulholland Drive ...|Crime|Drama|Film-...|43883.0|   4.0|2003-03-09 17:17:49|Mulholland Drive ...|
|  37729| Corpse Bride (2005)|Animation|Comedy|...|28890.0|   3.5|2006-12-20 03:09:47| Corpse Bride (2005)|
|   1584|      Contact (1997)|        Drama|Sci-Fi|35970.0|   3.0|2008-08-27 03:42:02|      Contact (1997)|
|    225|   Disclosure (1994)|      Drama|Thriller|65472.0|   3.0|2008-01-14 08:42:43|   Disclosure (1994)|
+-------+--------------------+--------------------+-------+------+-------------------+--------------------+
only showing top 5 rows



In [10]:
common_movies = common_movies.drop("rare")

In [11]:
user_movies = common_movies.groupby("userId") \
    .pivot("title") \
    .agg(F.first("rating"))

In [22]:
%%capture --no-display
user_movies.limit(5).toPandas()

Unnamed: 0,userId,"'burbs, The (1989)",(500) Days of Summer (2009),*batteries not included (1987),...And Justice for All (1979),10 Things I Hate About You (1999),"10,000 BC (2008)",101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),102 Dalmatians (2000),12 Angry Men (1957),12 Years a Slave (2013),127 Hours (2010),13 Going on 30 (2004),"13th Warrior, The (1999)",1408 (2007),15 Minutes (2001),16 Blocks (2006),17 Again (2009),1984 (Nineteen Eighty-Four) (1984),2 Days in the Valley (1996),"2 Fast 2 Furious (Fast and the Furious 2, The) (2003)","20,000 Leagues Under the Sea (1954)",200 Cigarettes (1999),2001: A Space Odyssey (1968),2010: The Year We Make Contact (1984),2012 (2009),2046 (2004),21 (2008),21 Grams (2003),21 Jump Street (2012),24 Hour Party People (2002),25th Hour (2002),27 Dresses (2008),28 Days (2000),28 Days Later (2002),28 Weeks Later (2007),3 Ninjas (1992),3-Iron (Bin-jip) (2004),30 Days of Night (2007),...,"World According to Garp, The (1982)","World Is Not Enough, The (1999)","World's Fastest Indian, The (2005)",Wreck-It Ralph (2012),"Wrestler, The (2008)",Wristcutters: A Love Story (2006),Wyatt Earp (1994),"X-Files: Fight the Future, The (1998)",X-Men (2000),X-Men Origins: Wolverine (2009),X-Men: Days of Future Past (2014),X-Men: First Class (2011),X-Men: The Last Stand (2006),X2: X-Men United (2003),"Year of Living Dangerously, The (1982)",Yellow Submarine (1968),Yes Man (2008),Yojimbo (1961),You Can Count on Me (2000),You Don't Mess with the Zohan (2008),You Only Live Twice (1967),You've Got Mail (1998),"You, Me and Dupree (2006)",Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Young Sherlock Holmes (1985),Zack and Miri Make a Porno (2008),Zelig (1983),Zero Dark Thirty (2012),Zero Effect (1998),Zodiac (2007),Zombieland (2009),Zoolander (2001),Zulu (1964),[REC] (2007),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
0,7.0,,,,,,,,,,,,,,,,,,,,,,,,3.0,3.0,,,,,,,,,,,,,,,...,,,,,,,,,4.0,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,,,,,2.0
1,8.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,29.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,5.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,42.0,,,,,,,,,,3.5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,47.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,0.5,,,,,,,,,2.5,,,,,,,,,,,,,,,,,,,,,,,,


In [12]:
path = "file:///Users/talha/OneDrive/Masaüstü/Talha Nebi Kumru/Data Science/Miuul/Recommendation Systems/Hybrid_Recommender_System/datasets"
user_movies.write.parquet(path + "/user_movies.parquet")

In [13]:
spark.stop()

## Select Recommendation Random User

In [19]:
import pandas as pd

In [21]:
path = "C:/Users/talha/OneDrive/Masaüstü/Talha Nebi Kumru/Data Science/Miuul/Recommendation Systems/Hybrid_Recommender_System/datasets/user_movies.parquet"
user_movies = pd.read_parquet(path)

In [22]:
user_movies.head()

Unnamed: 0,userId,"'burbs, The (1989)",(500) Days of Summer (2009),*batteries not included (1987),...And Justice for All (1979),10 Things I Hate About You (1999),"10,000 BC (2008)",101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),102 Dalmatians (2000),12 Angry Men (1957),12 Years a Slave (2013),127 Hours (2010),13 Going on 30 (2004),"13th Warrior, The (1999)",1408 (2007),15 Minutes (2001),16 Blocks (2006),17 Again (2009),1984 (Nineteen Eighty-Four) (1984),2 Days in the Valley (1996),"2 Fast 2 Furious (Fast and the Furious 2, The) (2003)","20,000 Leagues Under the Sea (1954)",200 Cigarettes (1999),2001: A Space Odyssey (1968),2010: The Year We Make Contact (1984),2012 (2009),2046 (2004),21 (2008),21 Grams (2003),21 Jump Street (2012),24 Hour Party People (2002),25th Hour (2002),27 Dresses (2008),28 Days (2000),28 Days Later (2002),28 Weeks Later (2007),3 Ninjas (1992),3-Iron (Bin-jip) (2004),30 Days of Night (2007),...,"World According to Garp, The (1982)","World Is Not Enough, The (1999)","World's Fastest Indian, The (2005)",Wreck-It Ralph (2012),"Wrestler, The (2008)",Wristcutters: A Love Story (2006),Wyatt Earp (1994),"X-Files: Fight the Future, The (1998)",X-Men (2000),X-Men Origins: Wolverine (2009),X-Men: Days of Future Past (2014),X-Men: First Class (2011),X-Men: The Last Stand (2006),X2: X-Men United (2003),"Year of Living Dangerously, The (1982)",Yellow Submarine (1968),Yes Man (2008),Yojimbo (1961),You Can Count on Me (2000),You Don't Mess with the Zohan (2008),You Only Live Twice (1967),You've Got Mail (1998),"You, Me and Dupree (2006)",Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Young Sherlock Holmes (1985),Zack and Miri Make a Porno (2008),Zelig (1983),Zero Dark Thirty (2012),Zero Effect (1998),Zodiac (2007),Zombieland (2009),Zoolander (2001),Zulu (1964),[REC] (2007),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
0,7.0,,,,,,,,,,,,,,,,,,,,,,,,3.0,3.0,,,,,,,,,,,,,,,...,,,,,,,,,4.0,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,,,,,2.0
1,8.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,29.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,5.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,42.0,,,,,,,,,,3.5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,47.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,0.5,,,,,,,,,2.5,,,,,,,,,,,,,,,,,,,,,,,,


In [32]:
random_user = user_movies["userId"].sample(1).item()

In [35]:
random_user

68785.0

In [37]:
random_user_df = user_movies[user_movies.index == random_user]

In [38]:
random_user_df

Unnamed: 0,userId,"'burbs, The (1989)",(500) Days of Summer (2009),*batteries not included (1987),...And Justice for All (1979),10 Things I Hate About You (1999),"10,000 BC (2008)",101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),102 Dalmatians (2000),12 Angry Men (1957),12 Years a Slave (2013),127 Hours (2010),13 Going on 30 (2004),"13th Warrior, The (1999)",1408 (2007),15 Minutes (2001),16 Blocks (2006),17 Again (2009),1984 (Nineteen Eighty-Four) (1984),2 Days in the Valley (1996),"2 Fast 2 Furious (Fast and the Furious 2, The) (2003)","20,000 Leagues Under the Sea (1954)",200 Cigarettes (1999),2001: A Space Odyssey (1968),2010: The Year We Make Contact (1984),2012 (2009),2046 (2004),21 (2008),21 Grams (2003),21 Jump Street (2012),24 Hour Party People (2002),25th Hour (2002),27 Dresses (2008),28 Days (2000),28 Days Later (2002),28 Weeks Later (2007),3 Ninjas (1992),3-Iron (Bin-jip) (2004),30 Days of Night (2007),...,"World According to Garp, The (1982)","World Is Not Enough, The (1999)","World's Fastest Indian, The (2005)",Wreck-It Ralph (2012),"Wrestler, The (2008)",Wristcutters: A Love Story (2006),Wyatt Earp (1994),"X-Files: Fight the Future, The (1998)",X-Men (2000),X-Men Origins: Wolverine (2009),X-Men: Days of Future Past (2014),X-Men: First Class (2011),X-Men: The Last Stand (2006),X2: X-Men United (2003),"Year of Living Dangerously, The (1982)",Yellow Submarine (1968),Yes Man (2008),Yojimbo (1961),You Can Count on Me (2000),You Don't Mess with the Zohan (2008),You Only Live Twice (1967),You've Got Mail (1998),"You, Me and Dupree (2006)",Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Young Sherlock Holmes (1985),Zack and Miri Make a Porno (2008),Zelig (1983),Zero Dark Thirty (2012),Zero Effect (1998),Zodiac (2007),Zombieland (2009),Zoolander (2001),Zulu (1964),[REC] (2007),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
68785,73727.0,,,,,3.5,,,,,,,,,,,,,,,,,,,,,,,,3.5,,,3.5,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,,,,,


In [40]:
random_user_movies_watched = random_user_df.columns[random_user_df.notna().any()].tolist()

In [41]:
len(random_user_movies_watched)

202

In [42]:
len(random_user_df.columns)

3160

## Get The Users which Watched Same Movies

In [43]:
movies_watched_df = user_movies[random_user_movies_watched]

In [44]:
movies_watched_df.head()

Unnamed: 0,userId,10 Things I Hate About You (1999),21 Grams (2003),25th Hour (2002),"40-Year-Old Virgin, The (2005)",50 First Dates (2004),8 Mile (2002),"Accidental Tourist, The (1988)","Accused, The (1988)",Adaptation (2002),Almost Famous (2000),Along Came Polly (2004),"Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001)",American Beauty (1999),American History X (1998),American Pie (1999),"American President, The (1995)",Anger Management (2003),Antwone Fisher (2002),As Good as It Gets (1997),Austin Powers: International Man of Mystery (1997),Back to the Future (1985),"Beautiful Mind, A (2001)",Beetlejuice (1988),Before Sunrise (1995),Before Sunset (2004),Being John Malkovich (1999),Bend It Like Beckham (2002),Benny & Joon (1993),Best in Show (2000),Bewitched (2005),Big (1988),Big Daddy (1999),Billy Madison (1995),Blow (2001),Bounce (2000),Bowling for Columbine (2002),"Breakfast Club, The (1985)",Broadcast News (1987),Bulworth (1998),...,Sideways (2004),"Silence of the Lambs, The (1991)","Sixth Sense, The (1999)",Sleeping with the Enemy (1991),Sleepless in Seattle (1993),Sliding Doors (1998),Smilla's Sense of Snow (1997),Spanglish (2004),Spellbound (2002),Splash (1984),Stand by Me (1986),State and Main (2000),"Stepford Wives, The (2004)",Stepmom (1998),Super Size Me (2004),"Sweetest Thing, The (2002)","Talented Mr. Ripley, The (1999)",Teaching Mrs. Tingle (1999),"Terminal, The (2004)",The Butterfly Effect (2004),Thelma & Louise (1991),There's Something About Mary (1998),Throw Momma from the Train (1987),"Time to Kill, A (1996)",Titanic (1953),Titanic (1997),Trading Places (1983),Traffic (2000),"Truman Show, The (1998)",Under the Tuscan Sun (2003),Vanilla Sky (2001),"Virgin Suicides, The (1999)",Weird Science (1985),Whale Rider (2002),What's Eating Gilbert Grape (1993),When Harry Met Sally... (1989),While You Were Sleeping (1995),Who Framed Roger Rabbit? (1988),Wonder Boys (2000),You've Got Mail (1998)
0,7.0,,,,,,,2.0,,,,,,3.0,,,4.0,,,,3.0,4.0,3.0,,,,,,3.0,4.0,,,3.0,,,4.0,,,3.0,,...,,,5.0,,4.0,,,,,,3.0,,,,,,2.0,,,,,,,,,5.0,3.0,,2.0,,,,,,,3.0,3.0,4.0,,3.0
1,8.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,5.0,,,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.0,,,
2,29.0,,,,,,,,,,,,,,,,4.0,,,,,,,,,,,,,,,,,5.0,,,,,,,...,,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.0,,,
3,42.0,,,,,,,,,,,,,4.5,,,,,,,,,,,,,,,,4.0,,,,,,,,,,,...,,5.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,47.0,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,4.0,2.5,,,,,,,,,,,,,,,,,,,,,,,2.5,,,,,,,,,,,,,,


In [45]:
user_movie_count = movies_watched_df.T.notnull().sum()

In [46]:
user_movie_count.head()

0    44
1    13
2    20
3    13
4    14
dtype: int64

In [51]:
user_movie_count = user_movie_count.reset_index()

In [52]:
user_movie_count.columns = ["userId", "movieCount"]

In [53]:
user_movie_count.head()

Unnamed: 0,userId,movieCount
0,0,44
1,1,13
2,2,20
3,3,13
4,4,14


In [54]:
percentage = len(random_user_movies_watched) * 60 / 100

In [55]:
percentage

121.2

In [57]:
user_same_movies = user_movie_count[user_movie_count.movieCount > percentage]

In [59]:
user_same_movies = user_same_movies.userId.to_list()

In [60]:
user_same_movies[:10]

[120, 157, 166, 794, 1005, 1138, 1150, 1366, 1426, 1753]

## Get The Users which Similariest The Random User

In [77]:
movies_watched_df = movies_watched_df[movies_watched_df.index.isin(user_same_movies)]

In [79]:
movies_watched_df.head()

Unnamed: 0,userId,10 Things I Hate About You (1999),21 Grams (2003),25th Hour (2002),"40-Year-Old Virgin, The (2005)",50 First Dates (2004),8 Mile (2002),"Accidental Tourist, The (1988)","Accused, The (1988)",Adaptation (2002),Almost Famous (2000),Along Came Polly (2004),"Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001)",American Beauty (1999),American History X (1998),American Pie (1999),"American President, The (1995)",Anger Management (2003),Antwone Fisher (2002),As Good as It Gets (1997),Austin Powers: International Man of Mystery (1997),Back to the Future (1985),"Beautiful Mind, A (2001)",Beetlejuice (1988),Before Sunrise (1995),Before Sunset (2004),Being John Malkovich (1999),Bend It Like Beckham (2002),Benny & Joon (1993),Best in Show (2000),Bewitched (2005),Big (1988),Big Daddy (1999),Billy Madison (1995),Blow (2001),Bounce (2000),Bowling for Columbine (2002),"Breakfast Club, The (1985)",Broadcast News (1987),Bulworth (1998),...,Sideways (2004),"Silence of the Lambs, The (1991)","Sixth Sense, The (1999)",Sleeping with the Enemy (1991),Sleepless in Seattle (1993),Sliding Doors (1998),Smilla's Sense of Snow (1997),Spanglish (2004),Spellbound (2002),Splash (1984),Stand by Me (1986),State and Main (2000),"Stepford Wives, The (2004)",Stepmom (1998),Super Size Me (2004),"Sweetest Thing, The (2002)","Talented Mr. Ripley, The (1999)",Teaching Mrs. Tingle (1999),"Terminal, The (2004)",The Butterfly Effect (2004),Thelma & Louise (1991),There's Something About Mary (1998),Throw Momma from the Train (1987),"Time to Kill, A (1996)",Titanic (1953),Titanic (1997),Trading Places (1983),Traffic (2000),"Truman Show, The (1998)",Under the Tuscan Sun (2003),Vanilla Sky (2001),"Virgin Suicides, The (1999)",Weird Science (1985),Whale Rider (2002),What's Eating Gilbert Grape (1993),When Harry Met Sally... (1989),While You Were Sleeping (1995),Who Framed Roger Rabbit? (1988),Wonder Boys (2000),You've Got Mail (1998)
120,632.0,3.0,3.0,3.0,3.0,,2.5,3.0,3.5,4.0,4.0,2.0,4.0,4.0,3.5,3.0,2.0,2.5,,3.0,3.0,2.0,4.0,3.0,4.0,4.5,2.0,3.5,,4.0,,2.0,3.0,3.0,,1.0,0.5,4.0,2.0,1.0,...,3.5,5.0,4.0,,3.0,3.0,3.5,3.5,,2.5,4.0,1.0,,,,,3.0,,,,2.0,3.5,,3.0,,,3.0,3.0,3.0,,2.0,4.0,2.0,,2.0,2.0,,,3.5,2.0
157,775.0,,,,,,,,4.0,5.0,3.0,,5.0,3.5,2.0,4.5,4.0,,,4.0,4.0,5.0,4.0,4.0,2.0,,5.0,,2.5,4.0,,3.5,,3.5,4.0,1.0,3.5,4.0,3.0,4.0,...,,5.0,4.0,1.0,1.5,2.0,,,4.5,2.5,3.5,,1.0,2.5,,,,,1.5,,3.0,4.5,,3.5,2.0,4.0,3.0,3.0,5.0,,2.0,1.5,4.0,2.5,4.5,2.5,1.0,5.0,3.5,1.0
166,812.0,4.0,,,,,3.0,,,,3.0,2.5,5.0,5.0,4.0,2.0,4.0,,3.0,3.0,4.0,4.0,3.0,4.0,,,3.0,,,2.0,,2.0,3.0,1.0,1.0,1.0,2.0,4.0,,0.5,...,,4.0,5.0,,3.0,,,,,3.0,5.0,,,,,0.5,0.5,1.0,,4.5,3.0,4.0,,3.0,,2.0,2.0,2.0,5.0,,3.0,3.0,3.0,,,4.0,3.0,3.0,3.0,
794,4084.0,,4.0,5.0,4.5,,4.0,,,5.0,5.0,,4.0,5.0,5.0,,3.0,,,5.0,,5.0,4.0,5.0,,4.5,5.0,4.0,,4.0,,3.0,,,3.0,3.0,5.0,4.0,5.0,5.0,...,5.0,5.0,4.0,,4.0,3.0,4.0,,5.0,4.0,5.0,4.0,2.5,3.0,4.5,,3.0,,3.5,,,4.0,,,,3.0,4.0,4.0,3.0,,4.0,4.0,,4.0,5.0,5.0,,4.0,4.0,3.0
1005,5155.0,3.5,,,,3.5,3.5,,,,4.0,2.5,,4.0,4.5,4.0,3.0,3.0,,3.0,4.0,4.5,3.5,4.0,,,,3.5,3.0,4.5,,3.5,3.5,4.0,4.5,,5.0,5.0,,,...,,4.5,4.5,,3.0,,,,,,4.0,,,,,3.0,3.0,,4.0,4.0,3.0,4.0,,4.0,,3.0,,4.0,3.5,,2.5,4.0,3.5,,4.0,,,4.0,,2.5


In [92]:
corr_df = movies_watched_df.T.corr().drop_duplicates()

In [93]:
corr_df

Unnamed: 0,120,157,166,794,1005,1138,1150,1366,1426,1753,1755,1776,1866,1954,2266,2316,2364,2389,2546,2573,3092,3300,3659,3691,4156,4401,4765,4872,4873,4878,5051,5174,5342,5431,5666,5917,6038,6247,6287,6365,...,134304,134349,134445,134463,134906,135094,135289,135327,135434,135454,135495,135510,135851,136109,136124,136185,136353,136371,136382,136425,136435,136541,136558,136965,137101,137232,137240,137255,137301,137429,137658,137872,137955,138004,138005,138065,138137,138171,138314,138331
120,1.000000,0.999840,0.999872,0.999886,0.999917,0.999888,0.999894,0.999898,0.999926,0.999873,0.999886,0.999877,0.999892,0.999910,0.999908,0.999899,0.999899,0.999883,0.999892,0.999897,0.999910,0.999901,0.999912,0.999877,0.999896,0.999910,0.999911,0.999873,0.999906,0.999894,0.999885,0.999896,0.999870,0.999892,0.999893,0.999917,0.999900,0.999898,0.999904,0.999913,...,0.999883,0.999896,0.999874,0.999919,0.999900,0.999907,0.999913,0.999877,0.999908,0.999872,0.999881,0.999908,0.999903,0.999885,0.999907,0.999916,0.999894,0.999916,0.999894,0.999877,0.999875,0.999893,0.999907,0.999903,0.999899,0.999913,0.999912,0.999891,0.999910,0.999884,0.999898,0.999929,0.999912,0.999879,0.999882,0.999894,0.999898,0.999879,0.999901,0.999892
157,0.999840,1.000000,0.999843,0.999888,0.999913,0.999886,0.999904,0.999907,0.999907,0.999857,0.999880,0.999868,0.999880,0.999894,0.999883,0.999904,0.999891,0.999885,0.999882,0.999905,0.999909,0.999918,0.999910,0.999881,0.999894,0.999892,0.999895,0.999872,0.999900,0.999890,0.999896,0.999887,0.999864,0.999889,0.999883,0.999906,0.999869,0.999891,0.999903,0.999922,...,0.999876,0.999901,0.999864,0.999908,0.999895,0.999896,0.999883,0.999877,0.999904,0.999876,0.999896,0.999893,0.999901,0.999885,0.999907,0.999900,0.999907,0.999910,0.999885,0.999871,0.999870,0.999871,0.999867,0.999897,0.999917,0.999893,0.999910,0.999894,0.999884,0.999893,0.999891,0.999900,0.999909,0.999852,0.999883,0.999880,0.999883,0.999862,0.999880,0.999883
166,0.999872,0.999843,1.000000,0.999910,0.999897,0.999903,0.999913,0.999915,0.999930,0.999897,0.999893,0.999887,0.999894,0.999909,0.999906,0.999908,0.999929,0.999893,0.999894,0.999918,0.999914,0.999924,0.999920,0.999885,0.999905,0.999910,0.999913,0.999890,0.999906,0.999901,0.999902,0.999901,0.999879,0.999917,0.999914,0.999921,0.999903,0.999904,0.999908,0.999923,...,0.999913,0.999912,0.999883,0.999914,0.999907,0.999909,0.999899,0.999888,0.999900,0.999887,0.999885,0.999900,0.999918,0.999903,0.999903,0.999922,0.999911,0.999919,0.999894,0.999886,0.999890,0.999901,0.999924,0.999906,0.999907,0.999914,0.999910,0.999916,0.999897,0.999894,0.999904,0.999920,0.999912,0.999876,0.999901,0.999897,0.999901,0.999887,0.999915,0.999906
794,0.999886,0.999888,0.999910,1.000000,0.999998,0.999999,0.999999,0.999998,0.999998,0.999998,0.999999,0.999998,0.999998,0.999999,0.999998,0.999999,0.999999,0.999998,0.999998,0.999998,0.999999,0.999999,0.999999,0.999998,0.999998,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999998,0.999999,0.999998,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,...,0.999998,0.999999,0.999998,0.999999,0.999999,0.999999,0.999999,0.999998,0.999999,0.999998,0.999998,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999997,0.999998,0.999998,0.999999,0.999999,0.999999,0.999999,0.999996,0.999999,0.999999,0.999999,0.999998,0.999998,0.999999,0.999999,0.999999,0.999998,0.999999,0.999998,0.999999,0.999998,0.999999,0.999999
1005,0.999917,0.999913,0.999897,0.999998,1.000000,0.999999,0.999999,0.999998,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,...,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999998,0.999998,0.999999,0.999999,0.999999,0.999999,0.999999,0.999997,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
138065,0.999894,0.999880,0.999897,0.999998,0.999999,0.999999,1.000000,0.999999,0.999999,1.000000,1.000000,0.999999,1.000000,1.000000,0.999999,1.000000,1.000000,0.999999,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.999997,0.999998,1.000000,1.000000,1.000000,1.000000,1.000000,0.999996,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
138137,0.999898,0.999883,0.999901,0.999999,0.999999,0.999999,1.000000,0.999999,0.999999,1.000000,1.000000,0.999999,1.000000,1.000000,0.999999,1.000000,1.000000,0.999999,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.999997,0.999998,1.000000,1.000000,1.000000,1.000000,1.000000,0.999996,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
138171,0.999879,0.999862,0.999887,0.999998,0.999999,0.999999,0.999999,0.999999,0.999999,0.999999,1.000000,0.999999,1.000000,1.000000,0.999999,1.000000,1.000000,0.999999,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.999997,0.999998,1.000000,1.000000,1.000000,1.000000,1.000000,0.999996,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
138314,0.999901,0.999880,0.999915,0.999999,0.999999,0.999999,1.000000,0.999999,0.999999,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.999998,0.999999,1.000000,1.000000,1.000000,1.000000,1.000000,0.999997,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000


In [121]:
top_similariest_user = corr_df[corr_df[random_user] > corr_df[random_user].quantile(0.90)][random_user]

In [125]:
top_similariest_user = top_similariest_user.reset_index()

In [126]:
top_similariest_user.head()

Unnamed: 0,index,68785
0,13354,1.0
1,15814,1.0
2,16135,1.0
3,18585,1.0
4,19585,1.0


In [132]:
top_similariest_user.columns = ["userId", "corr"]

In [133]:
top_similariest_user.head()

Unnamed: 0,userId,corr
0,13354,1.0
1,15814,1.0
2,16135,1.0
3,18585,1.0
4,19585,1.0


In [135]:
top_users_rating = top_similariest_user.merge(rating, how="inner")

In [136]:
top_users_rating.head()

Unnamed: 0,userId,corr,movieId,rating,timestamp
0,13354,1.0,47,5.0,2012-06-27 21:56:37
1,13354,1.0,111,5.0,2012-06-27 21:53:16
2,13354,1.0,175,4.5,2012-06-27 21:38:49
3,13354,1.0,256,0.5,2012-06-27 21:30:24
4,13354,1.0,345,5.0,2012-06-27 21:54:31


In [141]:
top_users_rating.drop("timestamp", axis=1, inplace=True)

In [142]:
top_users_rating["weighted_rating"] = top_users_rating["corr"] * top_users_rating["rating"]

In [160]:
recommendation_df = top_users_rating.groupby("movieId").agg({"weighted_rating": "mean"})

In [161]:
recommendation_df = recommendation_df.sort_values("weighted_rating", ascending=False)

In [162]:
recommendation_df = recommendation_df[recommendation_df.weighted_rating > recommendation_df.weighted_rating.quantile(0.9)]

In [163]:
recommended_movies = recommendation_df.head(5)

In [167]:
recommended_movies.reset_index(inplace=True)

In [168]:
recommended_movies.merge(movie[["movieId", "title"]], how="inner")

Unnamed: 0,movieId,weighted_rating,title
0,6398,5.0,Le Mans (1971)
1,6314,5.0,Undercover Blues (1993)
2,4506,5.0,Frantic (1988)
3,46972,5.0,Night at the Museum (2006)
4,63859,5.0,Bolt (2008)


## Recommendation for Random User Last Watched Movie

In [188]:
random_user_lovest_movie = df[(df["userId"] == random_user) & (df["rating"] == 5)]["title"].item()

In [196]:
random_user_lovest_movie

'Crouching Tiger, Hidden Dragon (Wo hu cang long) (2000)'

In [200]:
recommended_movies = user_movies.corrwith(user_movies[random_user_lovest_movie]).sort_values(ascending=False)

In [201]:
recommended_movies.head()

Crouching Tiger, Hidden Dragon (Wo hu cang long) (2000)    1.000000
Hero (Ying xiong) (2002)                                   0.418605
House of Flying Daggers (Shi mian mai fu) (2004)           0.414195
Drunken Master (Jui kuen) (1978)                           0.358782
Kagemusha (1980)                                           0.343141
dtype: float64