# Source

This is a notebook created to do the tutorial from **https://www.kaggle.com/kanncaa1/recommendation-systems-tutorial**

Dataset from: **https://www.kaggle.com/grouplens/movielens-20m-dataset**

In [1]:
import pandas as pd
import os

print(os.listdir("../../Datasets/movielens"))

['genome_scores.csv', 'link.csv', 'rating.csv', 'tag.csv', 'movie.csv', 'genome_tags.csv']


In [2]:
movie = pd.read_csv("../../Datasets/movielens/movie.csv")
movie.columns

Index(['movieId', 'title', 'genres'], dtype='object')

In [3]:
movie = movie.loc[:,["movieId", "title"]]
movie.head(5)

Unnamed: 0,movieId,title
0,1,Toy Story (1995)
1,2,Jumanji (1995)
2,3,Grumpier Old Men (1995)
3,4,Waiting to Exhale (1995)
4,5,Father of the Bride Part II (1995)


In [4]:
rating = pd.read_csv("../../Datasets/movielens/rating.csv")
rating.columns

Index(['userId', 'movieId', 'rating', 'timestamp'], dtype='object')

In [5]:
rating = rating.loc[:,["userId","movieId","rating"]]
rating.head(5)

Unnamed: 0,userId,movieId,rating
0,1,2,3.5
1,1,29,3.5
2,1,32,3.5
3,1,47,3.5
4,1,50,3.5


In [6]:
# merge two dataframes, detect and use movieId as column to merge
data = pd.merge(movie, rating)
data.head(5)

Unnamed: 0,movieId,title,userId,rating
0,1,Toy Story (1995),3,4.0
1,1,Toy Story (1995),6,5.0
2,1,Toy Story (1995),8,4.0
3,1,Toy Story (1995),10,4.0
4,1,Toy Story (1995),11,4.5


In [7]:
data.shape

(20000263, 4)

In [8]:
# To much data. Let's reduce to 2M
data = data.iloc[:2000000,:]
data.shape

(2000000, 4)

# Recommendation System: Item Based Collaborative Filtering

In [9]:
# lets make a pivot table in order to make:
# - rows are users 
# - columns are movies
# - values are rating
pivot_table = data.pivot_table(index = ["userId"], columns = ["title"], values = "rating")
pivot_table.head(10)

title,Ace Ventura: When Nature Calls (1995),Across the Sea of Time (1995),"Addiction, The (1995)",Amateur (1994),"Amazing Panda Adventure, The (1995)","American President, The (1995)",Angela (1995),Angels and Insects (1995),Anne Frank Remembered (1995),Antonia's Line (Antonia) (1995),...,Waiting to Exhale (1995),"Walk in the Clouds, A (1995)",Waterworld (1995),When Night Is Falling (1995),"White Balloon, The (Badkonake sefid) (1995)",White Man's Burden (1995),White Squall (1996),Wild Bill (1995),Wings of Courage (1995),"Young Poisoner's Handbook, The (1995)"
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,3.0,,,,,,,,,,...,,,,,,,,,,
5,,,,,,5.0,,,,,...,,,,,,,,,,
6,,,,,,,,,,,...,,,,,,,,,,
7,,,,,,4.0,,,,,...,,,,,,,,,,
8,1.0,,,,,,,,,,...,,4.0,3.0,,,,,,,
10,,,,,,4.0,,,,,...,,,,,,,,,,
11,3.5,,,,,,,,,,...,,,4.5,,,,,,,


In [10]:
movie_watched = pivot_table['Bad Boys (1995)']
movie.head(5)

userId
1   NaN
2   NaN
3   NaN
4   NaN
5   NaN
Name: Bad Boys (1995), dtype: float64

In [11]:
correlation_bad_boys = pivot_table.corrwith(movie_watched)
correlation_bad_boys

title
Ace Ventura: When Nature Calls (1995)    0.355291
Across the Sea of Time (1995)            0.450071
Addiction, The (1995)                    0.265729
Amateur (1994)                           0.132713
Amazing Panda Adventure, The (1995)      0.309740
                                           ...   
White Man's Burden (1995)                0.202258
White Squall (1996)                      0.327825
Wild Bill (1995)                         0.201812
Wings of Courage (1995)                 -0.324063
Young Poisoner's Handbook, The (1995)    0.021174
Length: 294, dtype: float64

In [12]:
similar_movies = correlation_bad_boys.sort_values(ascending=False)
similar_movies.head(10)

title
Bad Boys (1995)                            1.000000
Headless Body in Topless Bar (1995)        0.723747
Last Summer in the Hamptons (1995)         0.607554
Dream Man (1995)                           0.598623
New York Cop (Nyû Yôku no koppu) (1993)    0.522481
Two Bits (1995)                            0.507008
Shadows (Cienie) (1988)                    0.494186
Bushwhacked (1995)                         0.469242
Guardian Angel (1994)                      0.465218
Money Train (1995)                         0.451802
dtype: float64