In [1]:
import pandas as pd
import numpy as np
import os
import requests
import zipfile

import warnings
#warnings.simplefilter('ignore')

In [2]:
url = "http://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
filename = url.split('/')[-1]
r = requests.get(url, stream=True)
with open(filename, 'wb') as f:
    for chunk in r.iter_content(chunk_size=1024):
        if chunk:
            f.write(chunk)
            f.flush()

In [3]:
zipfile_path = os.getcwd() + "/"

In [4]:
zipfile_name = 'ml-latest-small'
zipfile = zipfile.ZipFile(zipfile_path+ zipfile_name + ".zip")
zipfile.extractall(zipfile_path)

In [5]:
movies_csv = pd.read_csv(zipfile_path + zipfile_name + "/movies.csv")
movies_csv

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [6]:
ratings_csv = pd.read_csv(zipfile_path + zipfile_name + "/ratings.csv")
ratings_csv

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [7]:
# join movies_csv and rating_csv through movieId
rating_dataset = pd.merge(ratings_csv, movies_csv, on="movieId")
rating_dataset = rating_dataset[["userId", "title", "rating"]]
rating_dataset

Unnamed: 0,userId,title,rating
0,1,Toy Story (1995),4.0
1,5,Toy Story (1995),4.0
2,7,Toy Story (1995),4.5
3,15,Toy Story (1995),2.5
4,17,Toy Story (1995),4.5
...,...,...,...
100831,610,Bloodmoon (1997),2.5
100832,610,Sympathy for the Underdog (1971),4.5
100833,610,Hazard (2005),3.0
100834,610,Blair Witch (2016),3.5


In [8]:
rating_pivot_table = rating_dataset.pivot_table(index='title', columns='userId', values='rating', dropna='true')

rating_pivot_table.columns.name = 'pivot table person\'s rating score to each movie title'

rating_pivot_table

pivot table person's rating score to each movie title,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'71 (2014),,,,,,,,,,,...,,,,,,,,,,4.0
'Hellboy': The Seeds of Creation (2004),,,,,,,,,,,...,,,,,,,,,,
'Round Midnight (1986),,,,,,,,,,,...,,,,,,,,,,
'Salem's Lot (2004),,,,,,,,,,,...,,,,,,,,,,
'Til There Was You (1997),,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
eXistenZ (1999),,,,,,,,,,,...,,,5.0,,,,,4.5,,
xXx (2002),,,,,,,,,1.0,,...,,,,,,,,3.5,,2.0
xXx: State of the Union (2005),,,,,,,,,,,...,,,,,,,,,,1.5
¡Three Amigos! (1986),4.0,,,,,,,,,,...,,,,,,,,,,


In [9]:
def similarity_indicator(person1, person2, dataset):

    movie1 = dataset[dataset[person1].notnull()][person1].index
    movie2 = dataset[dataset[person2].notnull()][person2].index

    # create intersection set
    set_movie1 = set(movie1)
    set_movie2 = set(movie2)
    intersection_set = set_movie1.intersection(set_movie2)

    if len(intersection_set) == 0: 
        return 0

    distance_index = pow(dataset.loc[list(intersection_set)][person1] - dataset.loc[list(intersection_set)][person2], 2)
    distance_squared = distance_index.sum()

    return 1/(1+np.sqrt(distance_squared)) 

In [10]:
def recommend_indicator(person, top_N, dataset):

    totals_recommendation_score = {}
    sum_similarity_score = {}
    rankings_result = []

    list_all_persons = list(dataset)
    # remove self from all persons
    list_all_persons.remove(person)
    list_others = list_all_persons

    for other in list_others:

        self_reviewed_movie = dataset[dataset[person].notnull()][person].index        
        set_self_reviewed = set(self_reviewed_movie)         
        
        other_reviewed_movie = dataset[dataset[other].notnull()][other].index
        set_others_reviewed = set(other_reviewed_movie) 

        set_self_not_reviewed = set_others_reviewed.difference(set_self_reviewed)

        #define similarity score
        similarity_score = similarity_indicator(person, other, dataset)

        for movie in set_self_not_reviewed:

            totals_recommendation_score.setdefault(movie,0)
            totals_recommendation_score[movie] += dataset[other][movie] * similarity_score 

            sum_similarity_score.setdefault(movie,0)
            sum_similarity_score[movie] += similarity_score 

    rankings = [(total/sum_similarity_score[movie],movie) for movie, total in totals_recommendation_score.items()]
    rankings.sort()
    rankings.reverse()
    
    #rankings_result = rankings_result.append(rankings)

    return [i[1] for i in rankings][:top_N]

In [11]:
recommended_movie = recommend_indicator(12, 10, rating_pivot_table)
recommended_movie

#rankings()
#rankings_list = list(rankings)

#pd.DataFrame(rankings_result)

#rankings_result



['World of Glory (1991)',
 "Won't You Be My Neighbor? (2018)",
 'Vampire in Venice (Nosferatu a Venezia) (Nosferatu in Venice) (1986)',
 'Vagabond (Sans toit ni loi) (1985)',
 'Unfaithfully Yours (1948)',
 'Tokyo Tribe (2014)',
 'Superman/Batman: Public Enemies (2009)',
 'Strictly Sexual (2008)',
 'Sandpiper, The (1965)',
 'Red Sorghum (Hong gao liang) (1987)']

In [12]:
pd.DataFrame(recommended_movie, columns=["Recommended Movie"])

Unnamed: 0,Recommended Movie
0,World of Glory (1991)
1,Won't You Be My Neighbor? (2018)
2,Vampire in Venice (Nosferatu a Venezia) (Nosfe...
3,Vagabond (Sans toit ni loi) (1985)
4,Unfaithfully Yours (1948)
5,Tokyo Tribe (2014)
6,Superman/Batman: Public Enemies (2009)
7,Strictly Sexual (2008)
8,"Sandpiper, The (1965)"
9,Red Sorghum (Hong gao liang) (1987)
