In [11]:
#import pandas data
import pandas as pd
import numpy as np

import scipy as sp
from sklearn.metrics.pairwise import cosine_similarity

import operator
%matplotlib inline

/kaggle/input/anime-recommendations-database/rating.csv
/kaggle/input/anime-recommendations-database/anime.csv


In [12]:
anime = pd.read_csv("../input/anime-recommendations-database/anime.csv")
anime


Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
...,...,...,...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,1,4.15,211
12290,5543,Under World,Hentai,OVA,1,4.28,183
12291,5621,Violence Gekiga David no Hoshi,Hentai,OVA,4,4.88,219
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175


In [13]:
rating = pd.read_csv("../input/anime-recommendations-database/rating.csv")
# Before alteration the ratings dataset uses a "-1" to represent missing ratings. I'm replacing these placeholders with a null value because 
# I will later be calculating the average rating per user and don't want the average to be distorted
rating.isnull().sum()


rating = rating.replace(-1, np.nan)
print(rating["rating"].unique())
rating

[nan 10.  8.  6.  9.  7.  3.  5.  4.  1.  2.]


Unnamed: 0,user_id,anime_id,rating
0,1,20,
1,1,24,
2,1,79,
3,1,226,
4,1,241,
...,...,...,...
7813732,73515,16512,7.0
7813733,73515,17187,9.0
7813734,73515,22145,10.0
7813735,73516,790,9.0


In [14]:
#find id of all movies with type movie
anime_movies = anime[anime['type']== "Movie"]

anime_movies

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
8,15335,Gintama Movie: Kanketsu-hen - Yorozuya yo Eien...,"Action, Comedy, Historical, Parody, Samurai, S...",Movie,1,9.10,72534
11,28851,Koe no Katachi,"Drama, School, Shounen",Movie,1,9.05,102733
15,199,Sen to Chihiro no Kamikakushi,"Adventure, Drama, Supernatural",Movie,1,8.93,466254
18,12355,Ookami Kodomo no Ame to Yuki,"Fantasy, Slice of Life",Movie,1,8.84,226193
...,...,...,...,...,...,...,...
11738,3038,Senya Ichiya Monogatari,"Fantasy, Hentai",Movie,1,6.18,1394
11805,2755,Cleopatra,"Hentai, Historical",Movie,1,6.06,1709
12244,11141,Blue Seagull,"Action, Hentai",Movie,1,4.60,337
12258,20007,Hi Gekiga Ukiyoe Senya Ichiya,"Action, Hentai",Movie,1,1.92,129


In [15]:
# Join the two dataframes on the anime_id columns
merged = rating.merge(anime_movies, left_on = 'anime_id', right_on = 'anime_id', suffixes= ['_user', ''])
merged.rename(columns = {'rating_user':'user_rating'}, inplace = True)
merged.head()

Unnamed: 0,user_id,anime_id,user_rating,name,genre,type,episodes,rating,members
0,1,442,,Naruto Movie 1: Dai Katsugeki!! Yuki Hime Shin...,"Adventure, Comedy, Drama, Historical, Shounen,...",Movie,1,7.17,120571
1,5,442,4.0,Naruto Movie 1: Dai Katsugeki!! Yuki Hime Shin...,"Adventure, Comedy, Drama, Historical, Shounen,...",Movie,1,7.17,120571
2,54,442,,Naruto Movie 1: Dai Katsugeki!! Yuki Hime Shin...,"Adventure, Comedy, Drama, Historical, Shounen,...",Movie,1,7.17,120571
3,99,442,2.0,Naruto Movie 1: Dai Katsugeki!! Yuki Hime Shin...,"Adventure, Comedy, Drama, Historical, Shounen,...",Movie,1,7.17,120571
4,128,442,7.0,Naruto Movie 1: Dai Katsugeki!! Yuki Hime Shin...,"Adventure, Comedy, Drama, Historical, Shounen,...",Movie,1,7.17,120571


In [16]:
# reduce number of users for computation reasons
merged=merged[['user_id', 'name', 'user_rating']]
merged_sub= merged[merged.user_id <= 10000]
merged_sub.head()

Unnamed: 0,user_id,name,user_rating
0,1,Naruto Movie 1: Dai Katsugeki!! Yuki Hime Shin...,
1,5,Naruto Movie 1: Dai Katsugeki!! Yuki Hime Shin...,4.0
2,54,Naruto Movie 1: Dai Katsugeki!! Yuki Hime Shin...,
3,99,Naruto Movie 1: Dai Katsugeki!! Yuki Hime Shin...,2.0
4,128,Naruto Movie 1: Dai Katsugeki!! Yuki Hime Shin...,7.0


In [17]:
# For collaborative filtering we'll need to create a pivot table of users on one axis and movies along the other. 
# The pivot table will help us in defining the similarity between users and shows to better predict who will like what.
piv = merged_sub.pivot_table(index=['user_id'], columns=['name'], values='user_rating')
piv.head()

name,&quot;Bungaku Shoujo&quot; Movie,.hack//G.U. Trilogy,.hack//The Movie: Sekai no Mukou ni,009 Re:Cyborg,00:08,1000-nen Joou: Queen Millennia,1001 Nights,11-nin Iru!,12-gatsu no Uta,3-tsu no Kumo,...,Yuuyake Dandan,Yuu☆Yuu☆Hakusho (Movie),Yuu☆Yuu☆Hakusho: Meikai Shitou-hen - Honoo no Kizuna,Yu☆Gi☆Oh! (1999),Yu☆Gi☆Oh!: Hikari no Pyramid,Yu☆Gi☆Oh!: The Dark Side of Dimensions,Zutto Mae kara Suki deshita.: Kokuhaku Jikkou Iinkai,eX-Driver the Movie,gdgd Fairies Movie: tte Iu Eiga wa Dou kana...?,xxxHOLiC Movie: Manatsu no Yoru no Yume
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
7,,,,,,,,,,,...,,,,,,,,,,
11,,,,,,,,,,,...,,,,,,,,,,
12,,,,,,,,,,,...,,,,,,,,,,


In [18]:
#normalize the values
# all users with only one rating or who had rated everything the same will be dropped

# Normalize the values
piv_norm = piv.apply(lambda row: (row-np.mean(row))/(np.max(row)-np.min(row)), axis=1)

# Drop all columns containing only zeros representing users who did not rate
piv_norm.fillna(0, inplace=True)
piv_norm = piv_norm.T
piv_norm = piv_norm.loc[:, (piv_norm != 0).any(axis=0)]

piv_norm

user_id,3,5,7,11,14,17,18,19,21,23,...,9982,9983,9986,9988,9989,9990,9993,9995,9997,9998
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
&quot;Bungaku Shoujo&quot; Movie,0.0,0.0,0.0,0.0,-0.2,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0
.hack//G.U. Trilogy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.0
.hack//The Movie: Sekai no Mukou ni,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0
009 Re:Cyborg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0
00:08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Yu☆Gi☆Oh!: The Dark Side of Dimensions,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0
Zutto Mae kara Suki deshita.: Kokuhaku Jikkou Iinkai,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0
eX-Driver the Movie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0
gdgd Fairies Movie: tte Iu Eiga wa Dou kana...?,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0


In [19]:
# Our data needs to be in a sparse matrix format to be read by the following functions

piv_sparse = sp.sparse.csr_matrix(piv_norm.values)
piv_sparse

<1233x5589 sparse matrix of type '<class 'numpy.float64'>'
	with 109604 stored elements in Compressed Sparse Row format>

In [20]:
# Our data needs to be in a sparse matrix format to be read by the following functions

piv_sparse = sp.sparse.csr_matrix(piv_norm.values)
piv_sparse

<1233x5589 sparse matrix of type '<class 'numpy.float64'>'
	with 109604 stored elements in Compressed Sparse Row format>

In [21]:
item_similarity = cosine_similarity(piv_sparse)
item_similarity

array([[ 1.        , -0.02969979,  0.01734393, ..., -0.00657098,
         0.        , -0.01266947],
       [-0.02969979,  1.        ,  0.22787337, ...,  0.01152839,
         0.        , -0.0056648 ],
       [ 0.01734393,  0.22787337,  1.        , ...,  0.01355427,
         0.        , -0.01828983],
       ...,
       [-0.00657098,  0.01152839,  0.01355427, ...,  1.        ,
         0.        ,  0.04607068],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         1.        ,  0.        ],
       [-0.01266947, -0.0056648 , -0.01828983, ...,  0.04607068,
         0.        ,  1.        ]])

In [22]:
user_similarity = cosine_similarity(piv_sparse.T)
user_similarity

array([[ 1.        ,  0.09223378,  0.        , ...,  0.03760041,
         0.        ,  0.        ],
       [ 0.09223378,  1.        ,  0.        , ...,  0.01720481,
        -0.11757419, -0.07207171],
       [ 0.        ,  0.        ,  1.        , ...,  0.12319008,
         0.        ,  0.        ],
       ...,
       [ 0.03760041,  0.01720481,  0.12319008, ...,  1.        ,
         0.        ,  0.        ],
       [ 0.        , -0.11757419,  0.        , ...,  0.        ,
         1.        ,  0.        ],
       [ 0.        , -0.07207171,  0.        , ...,  0.        ,
         0.        ,  1.        ]])

In [29]:
# Inserting the similarity matricies into dataframe objects

item_sim_df = pd.DataFrame(item_similarity, index = piv_norm.index, columns = piv_norm.index)
item_sim_df

name,&quot;Bungaku Shoujo&quot; Movie,.hack//G.U. Trilogy,.hack//The Movie: Sekai no Mukou ni,009 Re:Cyborg,00:08,1000-nen Joou: Queen Millennia,1001 Nights,11-nin Iru!,12-gatsu no Uta,3-tsu no Kumo,...,Yuuyake Dandan,Yuu☆Yuu☆Hakusho (Movie),Yuu☆Yuu☆Hakusho: Meikai Shitou-hen - Honoo no Kizuna,Yu☆Gi☆Oh! (1999),Yu☆Gi☆Oh!: Hikari no Pyramid,Yu☆Gi☆Oh!: The Dark Side of Dimensions,Zutto Mae kara Suki deshita.: Kokuhaku Jikkou Iinkai,eX-Driver the Movie,gdgd Fairies Movie: tte Iu Eiga wa Dou kana...?,xxxHOLiC Movie: Manatsu no Yoru no Yume
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
&quot;Bungaku Shoujo&quot; Movie,1.000000,-0.029700,0.017344,0.024627,-0.055151,0.000000,0.015069,-0.008191,0.000000,0.000000,...,0.0,-0.013359,-0.005624,-0.011537,0.006787,0.0,0.014433,-0.006571,0.0,-0.012669
.hack//G.U. Trilogy,-0.029700,1.000000,0.227873,0.002249,0.045528,0.000000,0.008948,0.000000,0.000000,0.000000,...,0.0,0.010138,0.005425,0.023727,-0.003587,0.0,-0.004935,0.011528,0.0,-0.005665
.hack//The Movie: Sekai no Mukou ni,0.017344,0.227873,1.000000,0.021763,0.025553,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.005348,0.000303,0.009905,-0.019676,0.0,-0.014518,0.013554,0.0,-0.018290
009 Re:Cyborg,0.024627,0.002249,0.021763,1.000000,0.000000,0.000000,0.041285,-0.019437,0.000000,0.000000,...,0.0,-0.001712,0.000000,0.010871,0.021701,0.0,0.012925,0.000000,0.0,-0.015710
00:08,-0.055151,0.045528,0.025553,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,-0.019630
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Yu☆Gi☆Oh!: The Dark Side of Dimensions,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.000000,-0.099536,-0.035585,1.0,0.000000,0.000000,0.0,0.000000
Zutto Mae kara Suki deshita.: Kokuhaku Jikkou Iinkai,0.014433,-0.004935,-0.014518,0.012925,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000600,0.000000,0.013634,-0.004683,0.0,1.000000,0.000000,0.0,0.000622
eX-Driver the Movie,-0.006571,0.011528,0.013554,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.007719,0.006489,0.010903,-0.001471,0.0,0.000000,1.000000,0.0,0.046071
gdgd Fairies Movie: tte Iu Eiga wa Dou kana...?,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,1.0,0.000000


In [30]:
user_sim_df = pd.DataFrame(user_similarity, index = piv_norm.columns, columns = piv_norm.columns)
user_sim_df

user_id,3,5,7,11,14,17,18,19,21,23,...,9982,9983,9986,9988,9989,9990,9993,9995,9997,9998
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3,1.000000,0.092234,0.000000,0.161406,0.084020,0.053698,-0.111943,0.067552,0.173663,0.000000,...,0.093870,-0.041789,0.353996,0.071094,0.001417,-0.106199,0.057426,0.037600,0.000000,0.000000
5,0.092234,1.000000,0.000000,0.058902,0.032862,0.128302,-0.037180,0.067825,0.042501,0.004480,...,0.016189,-0.013879,0.117574,0.166428,0.074959,-0.062416,0.019073,0.017205,-0.117574,-0.072072
7,0.000000,0.000000,1.000000,0.000000,0.037268,0.113719,0.000000,0.000000,-0.021082,0.204731,...,0.000000,0.000000,0.000000,0.000000,0.147087,0.000000,0.000000,0.123190,0.000000,0.000000
11,0.161406,0.058902,0.000000,1.000000,0.000000,0.122845,-0.030628,0.136591,0.061044,-0.083921,...,0.049351,-0.025212,0.178806,0.105191,0.000000,0.000000,0.068487,-0.003477,0.000000,0.063087
14,0.084020,0.032862,0.037268,0.000000,1.000000,0.066581,0.000000,0.084659,0.000000,0.091558,...,0.000000,-0.038795,0.000000,0.200000,0.065779,-0.365148,0.000000,0.169684,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9990,-0.106199,-0.062416,0.000000,0.000000,-0.365148,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,-0.273861,0.000000,1.000000,0.000000,-0.156911,0.000000,0.000000
9993,0.057426,0.019073,0.000000,0.068487,0.000000,0.058907,0.166721,0.014466,0.079582,0.000000,...,0.129050,-0.009575,-0.324443,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000
9995,0.037600,0.017205,0.123190,-0.003477,0.169684,0.039822,-0.062979,0.001292,-0.045501,0.000000,...,-0.009979,-0.015103,0.000000,0.088148,0.000000,-0.156911,0.000000,1.000000,0.000000,0.000000
9997,0.000000,-0.117574,0.000000,0.000000,0.000000,-0.042675,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000


In [31]:
# This function will return the top 10 movies with the highest cosine similarity value

def top_animes(anime_name):
    count = 1
    print('Similar shows to {} include:\n'.format(anime_name))
    for item in item_sim_df.sort_values(by = anime_name, ascending = False).index[1:11]:
        print('No. {}: {}'.format(count, item))
        count +=1 

In [33]:
top_animes('1001 Nights')

Similar shows to 1001 Nights include:

No. 1: Kouchou-sensei to Kujira
No. 2: Tsukumo
No. 3: Umi kara no Shisha
No. 4: Muybridge no Ito
No. 5: Kipling Jr.
No. 6: Kenju Giga
No. 7: Anime Sanjuushi: Aramis no Bouken
No. 8: Birthday Boy
No. 9: Hermes: Ai wa Kaze no Gotoku
No. 10: Alexander Senki Movie


In [35]:
# This function will return the top 5 users with the highest similarity value 
def top_users(user):
    
    if user not in piv_norm.columns:
        return('No data available on user {}'.format(user))
    
    print('Most Similar Users:\n')
    sim_values = user_sim_df.sort_values(by=user, ascending=False).loc[:,user].tolist()[1:6]
    sim_users = user_sim_df.sort_values(by=user, ascending=False).index[1:6]
    zipped = zip(sim_users, sim_values,)
    for user, sim in zipped:
        print('User #{0}, Similarity value: {1:.2f}'.format(user, sim)) 

In [43]:
top_users(3)

Most Similar Users:

User #3353, Similarity value: 0.58
User #3015, Similarity value: 0.52
User #9201, Similarity value: 0.47
User #4839, Similarity value: 0.46
User #4245, Similarity value: 0.45


In [45]:
# This function constructs a list of lists containing the highest rated movies per similar user
# and returns the name of the show along with the frequency it appears in the list
def similar_user_recs(user):
    
    if user not in piv_norm.columns:
        return('No data available on user {}'.format(user))
    
    sim_users = user_sim_df.sort_values(by=user, ascending=False).index[1:11]
    best = []
    most_common = {}
    
    for i in sim_users:
        max_score = piv_norm.loc[:, i].max()
        best.append(piv_norm[piv_norm.loc[:, i]==max_score].index.tolist())
    for i in range(len(best)):
        for j in best[i]:
            if j in most_common:
                most_common[j] += 1
            else:
                most_common[j] = 1
    sorted_list = sorted(most_common.items(), key=operator.itemgetter(1), reverse=True)
    return sorted_list[:5]  

In [49]:
similar_user_recs(5)

[('Gintama Movie: Kanketsu-hen - Yorozuya yo Eien Nare', 5),
 ('Byousoku 5 Centimeter', 3),
 ('Stranger: Mukou Hadan', 3),
 ('Steins;Gate Movie: Fuka Ryouiki no Déjà vu', 3),
 ('Summer Wars', 1)]

In [47]:
# This function calculates the weighted average of similar users
# to determine a potential rating for an input user and movies

def predicted_rating(anime_name, user):
    sim_users = user_sim_df.sort_values(by=user, ascending=False).index[1:1000]
    user_values = user_sim_df.sort_values(by=user, ascending=False).loc[:,user].tolist()[1:1000]
    rating_list = []
    weight_list = []
    for j, i in enumerate(sim_users):
        rating = piv.loc[i, anime_name]
        similarity = user_values[j]
        if np.isnan(rating):
            continue
        elif not np.isnan(rating):
            rating_list.append(rating*similarity)
            weight_list.append(similarity)
    return sum(rating_list)/sum(weight_list) 

In [48]:
predicted_rating('1001 Nights', 3)

7.000000000000001

In [53]:
# Creates a list of every show watched by user 5

watched = piv.T[piv.loc[5,:]>0].index.tolist()
watched

['Akira',
 'Ano Hi Mita Hana no Namae wo Bokutachi wa Mada Shiranai. Movie',
 'Aura: Maryuuin Kouga Saigo no Tatakai',
 'Byousoku 5 Centimeter',
 'Cardcaptor Sakura Movie 1',
 'Digimon Adventure 02 Movies',
 'Digimon Adventure 02: Diablomon no Gyakushuu',
 'Digimon Adventure Movie',
 'Digimon Adventure: Bokura no War Game!',
 'Dragon Ball Z Movie 03: Chikyuu Marugoto Choukessen',
 'Dragon Ball Z Movie 05: Tobikkiri no Saikyou tai Saikyou',
 'Dragon Ball Z Movie 10: Kiken na Futari! Super Senshi wa Nemurenai',
 'Dragon Ball Z Movie 12: Fukkatsu no Fusion!! Gokuu to Vegeta',
 'Gintama Movie: Kanketsu-hen - Yorozuya yo Eien Nare',
 'Kotonoha no Niwa',
 'Little Witch Academia',
 'Mangchi',
 'Naruto Movie 1: Dai Katsugeki!! Yuki Hime Shinobu Houjou Dattebayo!',
 'Naruto Movie 2: Dai Gekitotsu! Maboroshi no Chiteiiseki Dattebayo!',
 'Naruto Movie 3: Dai Koufun! Mikazuki Jima no Animaru Panikku Dattebayo!',
 'Naruto: Shippuuden Movie 1',
 'Naruto: Shippuuden Movie 2 - Kizuna',
 'Naruto: Shipp