In [9]:
# %load Recommenders.py
import numpy as np
import pandas

#Class for Popularity based Recommender System model
class popularity_recommender_py():
    def __init__(self):
        self.train_data = None
        self.user_id = None
        self.item_id = None
        self.popularity_recommendations = None
        
    #Create the popularity based recommender system model
    def create(self, train_data, user_id, item_id):
        self.train_data = train_data
        self.user_id = user_id
        self.item_id = item_id

        #Get a count of user_ids for each unique movie as recommendation score
        train_data_grouped = train_data.groupby([self.item_id]).agg({self.user_id: 'count'}).reset_index()
        train_data_grouped.rename(columns = {'user_id': 'score'},inplace=True)
    
        #Sort the movies based upon recommendation score
        train_data_sort = train_data_grouped.sort_values(['score', self.item_id], ascending = [0,1])
    
        #Generate a recommendation rank based upon score
        train_data_sort['Rank'] = train_data_sort['score'].rank(ascending=0, method='first')
        
        #Get the top 10 recommendations
        self.popularity_recommendations = train_data_sort.head(10)

    #Use the popularity based recommender system model to
    #make recommendations
    def recommend(self, user_id):    
        user_recommendations = self.popularity_recommendations
        
        #Add user_id column for which the recommendations are being generated
        user_recommendations['user_id'] = user_id
    
        #Bring user_id column to the front
        cols = user_recommendations.columns.tolist()
        cols = cols[-1:] + cols[:-1]
        user_recommendations = user_recommendations[cols]
        
        return user_recommendations
    

#Class for Item similarity based Recommender System model
class item_similarity_recommender_py():
    def __init__(self):
        self.train_data = None
        self.user_id = None
        self.item_id = None
        self.cooccurence_matrix = None
        self.movies_dict = None
        self.rev_movies_dict = None
        self.item_similarity_recommendations = None
        
    #Get unique items (movies) corresponding to a given user
    def get_user_items(self, user):
        user_data = self.train_data[self.train_data[self.user_id] == user]
        user_items = list(user_data[self.item_id].unique())
        
        return user_items
        
    #Get unique users for a given item (movie)
    def get_item_users(self, item):
        item_data = self.train_data[self.train_data[self.item_id] == item]
        item_users = set(item_data[self.user_id].unique())
            
        return item_users
        
    #Get unique items (movies) in the training data
    def get_all_items_train_data(self):
        all_items = list(self.train_data[self.item_id].unique())
            
        return all_items
        
    #Construct cooccurence matrix
    def construct_cooccurence_matrix(self, user_movies, all_movies):
            
        ####################################
        #Get users for all movies in user_movies.
        ####################################
        user_movies_users = []        
        for i in range(0, len(user_movies)):
            user_movies_users.append(self.get_item_users(user_movies[i]))
            
        ###############################################
        #Initialize the item cooccurence matrix of size 
        #len(user_movies) X len(movies)
        ###############################################
        cooccurence_matrix = np.matrix(np.zeros(shape=(len(user_movies), len(all_movies))), float)
           
        #############################################################
        #Calculate similarity between user movies and all unique movies
        #in the training data
        #############################################################
        for i in range(0,len(all_movies)):
            #Calculate unique listeners (users) of movie (item) i
            movies_i_data = self.train_data[self.train_data[self.item_id] == all_movies[i]]
            users_i = set(movies_i_data[self.user_id].unique())
            
            for j in range(0,len(user_movies)):       
                    
                #Get unique viewers (users) of movie (item) j
                users_j = user_movies_users[j]
                    
                #Calculate intersection of viewers of movies i and j
                users_intersection = users_i.intersection(users_j)
                
                #Calculate cooccurence_matrix[i,j] as Jaccard Index
                if len(users_intersection) != 0:
                    #Calculate union of listeners of movies i and j
                    users_union = users_i.union(users_j)
                    
                    cooccurence_matrix[j,i] = float(len(users_intersection))/float(len(users_union))
                else:
                    cooccurence_matrix[j,i] = 0
                    
        
        return cooccurence_matrix

    
    #Use the cooccurence matrix to make top recommendations
    def generate_top_recommendations(self, user, cooccurence_matrix, all_movies, user_movies):
        print("Non zero values in cooccurence_matrix :%d" % np.count_nonzero(cooccurence_matrix))
        
        #Calculate a weighted average of the scores in cooccurence matrix for all user movies.
        user_sim_scores = cooccurence_matrix.sum(axis=0)/float(cooccurence_matrix.shape[0])
        user_sim_scores = np.array(user_sim_scores)[0].tolist()
 
        #Sort the indices of user_sim_scores based upon their value
        #Also maintain the corresponding score
        sort_index = sorted(((e,i) for i,e in enumerate(list(user_sim_scores))), reverse=True)
    
        #Create a dataframe from the following
        columns = ['user_id', 'movie', 'score', 'rank']
        #index = np.arange(1) # array of numbers for the number of samples
        df = pandas.DataFrame(columns=columns)
         
        #Fill the dataframe with top 10 item based recommendations
        rank = 1 
        for i in range(0,len(sort_index)):
            if ~np.isnan(sort_index[i][0]) and all_movies[sort_index[i][1]] not in user_movies and rank <= 10:
                df.loc[len(df)]=[user,all_movies[sort_index[i][1]],sort_index[i][0],rank]
                rank = rank+1
        
        #Handle the case where there are no recommendations
        if df.shape[0] == 0:
            print("The current user has no movies for training the item similarity based recommendation model.")
            return -1
        else:
            return df
 
    #Create the item similarity based recommender system model
    def create(self, train_data, user_id, item_id):
        self.train_data = train_data
        self.user_id = user_id
        self.item_id = item_id

    #Use the item similarity based recommender system model to
    #make recommendations
    def recommend(self, user):
        
        ########################################
        #A. Get all unique movies for this user
        ########################################
        user_movies = self.get_user_items(user)    
            
        print("No. of unique movies for the user: %d" % len(user_movies))
        
        ######################################################
        #B. Get all unique items (movies) in the training data
        ######################################################
        all_movies = self.get_all_items_train_data()
        
        print("no. of unique movies in the training set: %d" % len(all_movies))
         
        ###############################################
        #C. Construct item cooccurence matrix of size 
        #len(user_movies) X len(movies)
        ###############################################
        cooccurence_matrix = self.construct_cooccurence_matrix(user_movies, all_movies)
        
        #######################################################
        #D. Use the cooccurence matrix to make recommendations
        #######################################################
        df_recommendations = self.generate_top_recommendations(user, cooccurence_matrix, all_movies, user_movies)
                
        return df_recommendations
    
    #Get similar items to given items
    def get_similar_items(self, item_list):
        
        user_movies = item_list
        
        ######################################################
        #B. Get all unique items (movies) in the training data
        ######################################################
        all_movies = self.get_all_items_train_data()
        
        print("no. of unique movies in the training set: %d" % len(all_movies))
         
        ###############################################
        #C. Construct item cooccurence matrix of size 
        #len(user_movies) X len(movies)
        ###############################################
        cooccurence_matrix = self.construct_cooccurence_matrix(user_movies, all_movies)
        
        #######################################################
        #D. Use the cooccurence matrix to make recommendations
        #######################################################
        user = ""
        df_recommendations = self.generate_top_recommendations(user, cooccurence_matrix, all_movies, user_movies)
         
        return df_recommendations

In [10]:
import pandas
import random
from sklearn.cross_validation import train_test_split
import numpy as np
import time
from sklearn.externals import joblib
import Recommenders as Recommenders
#import Evaluation as Evaluation

#Read userid-songid-listen_count triplets
#This step might take time to download data from external sources
triplets_file = 'copy_of_r.txt'
movies_metadata_file = 'movies.csv'

movie_df_1 = pandas.read_table(triplets_file,header=None)
movie_df_1.columns = ['user_id', 'movieId', 'rating','timestamp']

#song_df_1.head()

movie_df_2 =  pandas.read_csv(movies_metadata_file)
movie_df = pandas.merge(movie_df_1, movie_df_2.drop_duplicates(['movieId']), on="movieId", how="left")

movie_df.head(20)

#movie_df.to_csv('final.csv', index=False, header=False)


Unnamed: 0,user_id,movieId,rating,timestamp,title,genres
0,1,31,2.5,1260759144,Dangerous Minds (1995),Drama
1,1,1029,3.0,1260759179,Dumbo (1941),Animation|Children|Drama|Musical
2,1,1061,3.0,1260759182,Sleepers (1996),Thriller
3,1,1129,2.0,1260759185,Escape from New York (1981),Action|Adventure|Sci-Fi|Thriller
4,1,1172,4.0,1260759205,Cinema Paradiso (Nuovo cinema Paradiso) (1989),Drama
5,1,1263,2.0,1260759151,"Deer Hunter, The (1978)",Drama|War
6,1,1287,2.0,1260759187,Ben-Hur (1959),Action|Adventure|Drama
7,1,1293,2.0,1260759148,Gandhi (1982),Drama
8,1,1339,3.5,1260759125,Dracula (Bram Stoker's Dracula) (1992),Fantasy|Horror|Romance|Thriller
9,1,1343,2.0,1260759131,Cape Fear (1991),Thriller


In [11]:
#Merge song title and artist_name columns to make a merged column

#%save movie_recommender_python.ipynb Out[14]

movie_df['movie'] = movie_df['title'].map(str) + " - " + movie_df['genres']

movie_grouped = movie_df.groupby(['movie']).agg({'rating': 'count'}).reset_index()
grouped_sum = movie_grouped['rating'].sum()
movie_grouped['percentage']  = movie_grouped['rating'].div(grouped_sum)*100
movie_grouped.sort_values(['rating', 'movie'], ascending = [0,1])

Unnamed: 0,movie,rating,percentage
2933,Forrest Gump (1994) - Comedy|Drama|Romance|War,341,0.340986
6392,Pulp Fiction (1994) - Comedy|Crime|Drama|Thriller,324,0.323987
7085,"Shawshank Redemption, The (1994) - Crime|Drama",311,0.310988
7167,"Silence of the Lambs, The (1991) - Crime|Horro...",304,0.303988
7489,Star Wars: Episode IV - A New Hope (1977) - Ac...,291,0.290988
4308,Jurassic Park (1993) - Action|Adventure|Sci-Fi...,274,0.273989
5103,"Matrix, The (1999) - Action|Sci-Fi|Thriller",259,0.258990
8249,Toy Story (1995) - Adventure|Animation|Childre...,247,0.246990
6919,Schindler's List (1993) - Drama|War,244,0.243990
7857,Terminator 2: Judgment Day (1991) - Action|Sci-Fi,237,0.236991


In [12]:
users = movie_df['user_id'].unique()
len(users)

671

In [13]:
movie_grouped = movie_df.groupby(['rating']).agg({'movie':'count'}).reset_index()
grouped_sum = movie_grouped['rating'].sum()

sum_movie = movie_grouped['movie'].sum()
print sum_movie

movie_grouped['percentage']  = movie_grouped['rating'].div(grouped_sum)*100
movie_grouped.sort_values(['rating', 'movie'], ascending = [0,1])


100004


Unnamed: 0,rating,movie,percentage
9,5.0,15095,18.181818
8,4.5,7723,16.363636
7,4.0,28750,14.545455
6,3.5,10538,12.727273
5,3.0,20064,10.909091
4,2.5,4449,9.090909
3,2.0,7271,7.272727
2,1.5,1687,5.454545
1,1.0,3326,3.636364
0,0.5,1101,1.818182


In [14]:
train_data, test_data = train_test_split(movie_df, test_size = 0.20, random_state=0)
print(train_data.head(5))

pm = Recommenders.popularity_recommender_py()
pm.create(train_data, 'user_id', 'movie')

#user_id = users[5]
#pm.recommend(user_id)

user_id = users[8]
pm.recommend(user_id)


       user_id  movieId  rating   timestamp  \
73174      509     7323     3.5  1093277659   
30938      220     1961     4.0   970504853   
79289      547     3467     4.5  1086010787   
81338      553    35836     4.0  1423010653   
2893        17     4027     1.5  1127469852   

                                   title                  genres  \
73174            Good bye, Lenin! (2003)            Comedy|Drama   
30938                    Rain Man (1988)                   Drama   
79289                         Hud (1963)           Drama|Western   
81338     40-Year-Old Virgin, The (2005)          Comedy|Romance   
2893   O Brother, Where Art Thou? (2000)  Adventure|Comedy|Crime   

                                                   movie  
73174             Good bye, Lenin! (2003) - Comedy|Drama  
30938                            Rain Man (1988) - Drama  
79289                         Hud (1963) - Drama|Western  
81338    40-Year-Old Virgin, The (2005) - Comedy|Romance  
2893   O Brot

Unnamed: 0,user_id,movie,score,Rank
2737,9,Forrest Gump (1994) - Comedy|Drama|Romance|War,267,1.0
5941,9,Pulp Fiction (1994) - Comedy|Crime|Drama|Thriller,267,2.0
6579,9,"Shawshank Redemption, The (1994) - Crime|Drama",249,3.0
6657,9,"Silence of the Lambs, The (1991) - Crime|Horro...",233,4.0
6959,9,Star Wars: Episode IV - A New Hope (1977) - Ac...,230,5.0
4020,9,Jurassic Park (1993) - Action|Adventure|Sci-Fi...,215,6.0
4748,9,"Matrix, The (1999) - Action|Sci-Fi|Thriller",213,7.0
7642,9,Toy Story (1995) - Adventure|Animation|Childre...,209,8.0
621,9,Back to the Future (1985) - Adventure|Comedy|S...,194,9.0
2540,9,Fargo (1996) - Comedy|Crime|Drama|Thriller,192,10.0


In [15]:
import csv
import random
import math
import operator
import pandas as pd

recommender = pandas.read_csv("C:/MP 3/temp1.csv")

df = pd.DataFrame(recommender)
stack = df.stack()
df.replace({'Action': 1.01}, inplace='True')
df.replace({'Adventure': 1.02}, inplace='True')
df.replace({'Animation': 1.03}, inplace='True')
df.replace({'Comedy': 1.05}, inplace='True')
df.replace({'Crime': 1.06}, inplace='True')
df.replace({'Drama': 1.08}, inplace='True')
df.replace({'Fantasy': 1.09}, inplace='True')
df.replace({'Horror': 1.10}, inplace='True')
df.replace({'IMAX': 1.11}, inplace='True')
df.replace({'Mystery': 1.13}, inplace='True')
df.replace({'Romance': 1.14}, inplace='True')
df.replace({'Sci-Fi': 1.15}, inplace='True')
df.replace({'Thriller': 1.16}, inplace='True')
df.replace({'Western': 1.18}, inplace='True')
df.replace({'Children': 1.04}, inplace='True')
df.replace({'Musical': 1.12}, inplace='True')
df.replace({'Documentary': 1.07}, inplace='True')
df.replace({'War': 1.17}, inplace='True')
print df

# with open("temp56.csv", "w") as output:
#     writer = csv.writer(output, lineterminator='\n')
#     for x in range(len(df['genre'])):
#         writer.writerow([x])

df.to_csv(r'C:/MP 3/temp56.csv', header=None, index=None, sep=',', mode='w')

     Uid   Mid  Ratings  Genre
0      1    31      2.5   1.08
1      1  1029      3.0   1.03
2      1  1061      3.0   1.16
3      1  1129      2.0   1.01
4      1  1172      4.0   1.08
5      1  1263      2.0   1.08
6      1  1287      2.0   1.01
7      1  1293      2.0   1.08
8      1  1339      3.5   1.09
9      1  1343      2.0   1.16
10     1  1371      2.5   1.02
11     1  1405      1.0   1.02
12     1  1953      4.0   1.01
13     1  2105      4.0   1.01
14     1  2150      3.0   1.02
15     1  2193      2.0   1.01
16     1  2294      2.0   1.02
17     1  2455      2.5   1.08
18     1  2968      1.0   1.02
19     1  3671      3.0   1.05
20     2    10      4.0   1.01
21     2    17      5.0   1.14
22     2    39      5.0   1.05
23     2    47      4.0   1.13
24     2    50      4.0   1.06
25     2    52      3.0   1.05
26     2    62      3.0   1.08
27     2   110      4.0   1.01
28     2   144      3.0   1.05
29     2   150      5.0   1.11
..   ...   ...      ...    ...
470    6

In [20]:
import csv
import random
import math
import operator
 
def loadDataset(filename, split, trainingSet=[] , testSet=[]):
    with open(filename, 'rt') as csvfile:
        lines = csv.reader(csvfile)
        dataset = list(lines)
        #print(dataset)
        print(len(dataset))
        for x in range(len(dataset)-1):
            for y in range(2):
                dataset[x][y] = (float(dataset[x][y]))
            if random.random() < split:
                trainingSet.append(dataset[x])
            else:
                testSet.append(dataset[x])

def euclideanDistance(instance1, instance2, length):
    distance = 0
    for x in range(length):
        distance += pow((float(instance1[x]) - float(instance2[x])), 2)
    return math.sqrt(distance)

def getNeighbors(trainingSet, testInstance, k):
    distances = []
    length = len(testInstance)-1
    for x in range(len(trainingSet)):
        dist = euclideanDistance(testInstance, trainingSet[x], length)
        distances.append((trainingSet[x], dist))
    distances.sort(key=operator.itemgetter(1))
    neighbors = []
    for x in range(k):
        neighbors.append(distances[x][0])
    return neighbors

def getResponse(neighbors):
    classVotes = {}
    for x in range(len(neighbors)):
        response = neighbors[x][-1]
        if response in classVotes:
            classVotes[response] += 1
        else:
            classVotes[response] = 1
    sortedVotes = sorted(classVotes.items(), key=operator.itemgetter(1), reverse=True)
    #print response
    #print classVotes
    return sortedVotes[0][0]
    
                        
def getAccuracy(testSet, predictions):
    correct = 0
    for x in range(len(testSet)):
        if testSet[x][-1] == predictions[x]:
            correct += 1
    return (correct/float(len(testSet))) * 100.0

def main():
    # prepare data
    trainingSet=[]
    testSet=[]
    split = 0.5
    predictions=[]
    actual=[]
    loadDataset('final1.csv', split, trainingSet, testSet)
    print ('Train set: ' + repr(len(trainingSet)))
    print ('Test set: ' + repr(len(testSet)))
    # generate predictions
    k = 10
    for x in range(len(testSet)):
        neighbors = getNeighbors(trainingSet, testSet[x], k)
        #print(neighbours)
        result = getResponse(neighbors)
        predictions.append(result)
        actual.append(testSet[x][-1])
        print('> predicted=' + repr(result) + ', actual=' + repr(testSet[x][-1]))
    accuracy = getAccuracy(testSet, predictions)
    print('Accuracy: ' + repr(accuracy) + '%') 
    
main() 

900
Train set: 421
Test set: 478
> predicted='5', actual='3'
> predicted='5', actual='4'
> predicted='4', actual='2'
> predicted='4', actual='2'
> predicted='5', actual='1'
> predicted='5', actual='4'
> predicted='4', actual='3'
> predicted='4', actual='2'
> predicted='5', actual='1'
> predicted='3', actual='4'
> predicted='5', actual='5'
> predicted='5', actual='4'
> predicted='5', actual='3'
> predicted='4', actual='3'
> predicted='4', actual='5'
> predicted='4', actual='3'
> predicted='4', actual='3'
> predicted='3', actual='3'
> predicted='3', actual='5'
> predicted='3', actual='3'
> predicted='3', actual='4'
> predicted='3', actual='5'
> predicted='3', actual='5'
> predicted='3', actual='4'
> predicted='5', actual='3'
> predicted='4', actual='4'
> predicted='4', actual='3'
> predicted='5', actual='4'
> predicted='5', actual='2'
> predicted='3', actual='4'
> predicted='3', actual='3'
> predicted='3', actual='3'
> predicted='3', actual='3'
> predicted='3', actual='3'
> predicted='3'

In [21]:
import csv
import random
import math
import operator
 
def loadDataset(filename, split, trainingSet=[] , testSet=[]):
    with open(filename, 'rt') as csvfile:
        lines = csv.reader(csvfile)
        dataset = list(lines)
        #print(dataset)
        print(len(dataset))
        for x in range(len(dataset)-1):
            for y in range(2):
                dataset[x][y] = (float(dataset[x][y]))
            if random.random() < split:
                trainingSet.append(dataset[x])
            else:
                testSet.append(dataset[x])

def euclideanDistance(instance1, instance2, length):
    distance = 0
    for x in range(length):
        distance += pow((float(instance1[x]) - float(instance2[x])), 2)
    return math.sqrt(distance)

def getNeighbors(trainingSet, testInstance, k):
    distances = []
    length = len(testInstance)-1
    for x in range(len(trainingSet)):
        dist = euclideanDistance(testInstance, trainingSet[x], length)
        distances.append((trainingSet[x], dist))
    distances.sort(key=operator.itemgetter(1))
    neighbors = []
    for x in range(k):
        neighbors.append(distances[x][0])
    return neighbors

def getResponse(neighbors):
    classVotes = {}
    for x in range(len(neighbors)):
        response = neighbors[x][-1]
        if response in classVotes:
            classVotes[response] += 1
        else:
            classVotes[response] = 1
    sortedVotes = sorted(classVotes.items(), key=operator.itemgetter(1), reverse=True)
    return sortedVotes[0][0]
                        
def getAccuracy(testSet, predictions):
    correct = 0
    for x in range(len(testSet)):
        if testSet[x][-1] == predictions[x]:
            correct += 1
    return (correct/float(len(testSet))) * 100.0

def main():
    # prepare data
    trainingSet=[]
    testSet=[]
    split = 0.50    
    predictions=[]
    actual=[]
    loadDataset('temp56.csv', split, trainingSet, testSet)
    print ('Train set: ' + repr(len(trainingSet)))
    print ('Test set: ' + repr(len(testSet)))
    # generate predictions
    k = 10
    for x in range(len(testSet)):
        neighbors = getNeighbors(trainingSet, testSet[x], k)
        #print(neighbours)
        result = getResponse(neighbors)
        predictions.append(result)
        actual.append(testSet[x][-1])
        print('> predicted=' + repr(result) + ', actual=' + repr(testSet[x][-1]))
    accuracy = getAccuracy(testSet, predictions)
    print('Accuracy: ' + repr(accuracy) + '%')
    
    with open("final5.csv", "w") as output:
        writer = csv.writer(output, lineterminator='\n')
        for x in actual:
            writer.writerow([x]) 
    
main() 

500
Train set: 230
Test set: 269
> predicted='1.05', actual='1.08'
> predicted='1.02', actual='1.03'
> predicted='1.05', actual='1.01'
> predicted='1.15', actual='1.08'
> predicted='1.05', actual='1.01'
> predicted='1.15', actual='1.09'
> predicted='1.15', actual='1.02'
> predicted='1.02', actual='1.01'
> predicted='1.02', actual='1.02'
> predicted='1.05', actual='1.02'
> predicted='1.08', actual='1.05'
> predicted='1.05', actual='1.01'
> predicted='1.05', actual='1.13'
> predicted='1.05', actual='1.01'
> predicted='1.05', actual='1.05'
> predicted='1.06', actual='1.11'
> predicted='1.06', actual='1.01'
> predicted='1.16', actual='1.16'
> predicted='1.15', actual='1.05'
> predicted='1.15', actual='1.16'
> predicted='1.14', actual='1.05'
> predicted='1.15', actual='1.05'
> predicted='1.15', actual='1.1'
> predicted='1.08', actual='1.08'
> predicted='1.08', actual='1.08'
> predicted='1.05', actual='1.16'
> predicted='1.05', actual='1.05'
> predicted='1.14', actual='1.04'
> predicted='1.1

In [16]:
#PERONALISATION

is_model = Recommenders.item_similarity_recommender_py()
is_model.create(train_data, 'user_id', 'movie')

user_id = users[5]
user_items = is_model.get_user_items(user_id)
#
print("------------------------------------------------------------------------------------")
print("Training data movies for the user userid: %s:" % user_id)
print("------------------------------------------------------------------------------------")

for user_item in user_items:
    print(user_item)

print("----------------------------------------------------------------------")
print("Recommendation process going on:")
print("----------------------------------------------------------------------")

#Recommend movies for the user using personalized model
is_model.recommend(user_id)

------------------------------------------------------------------------------------
Training data movies for the user userid: 6:
------------------------------------------------------------------------------------
Lethal Weapon 2 (1989) - Action|Comedy|Crime|Drama
X-Files: Fight the Future, The (1998) - Action|Crime|Mystery|Sci-Fi|Thriller
Casper (1995) - Adventure|Children
Beetlejuice (1988) - Comedy|Fantasy
Stand by Me (1986) - Adventure|Drama
Office Space (1999) - Comedy|Crime
Matrix, The (1999) - Action|Sci-Fi|Thriller
Bridge on the River Kwai, The (1957) - Adventure|Drama|War
Three Kings (1999) - Action|Adventure|Comedy|Drama|War
Planet of the Apes (1968) - Action|Drama|Sci-Fi
Vertigo (1958) - Drama|Mystery|Romance|Thriller
Dogma (1999) - Adventure|Comedy|Fantasy
Pinocchio (1940) - Animation|Children|Fantasy|Musical
Léon: The Professional (a.k.a. The Professional) (Léon) (1994) - Action|Crime|Drama|Thriller
Lord of the Rings: The Two Towers, The (2002) - Adventure|Fantasy
Deep Im

Unnamed: 0,user_id,movie,score,rank
0,6.0,"O Brother, Where Art Thou? (2000) - Adventure|...",0.169179,1.0
1,6.0,Ferris Bueller's Day Off (1986) - Comedy,0.167204,2.0
2,6.0,Big (1988) - Comedy|Drama|Fantasy|Romance,0.166291,3.0
3,6.0,Spider-Man (2002) - Action|Adventure|Sci-Fi|Th...,0.161991,4.0
4,6.0,"Truman Show, The (1998) - Comedy|Drama|Sci-Fi",0.160019,5.0
5,6.0,Austin Powers: International Man of Mystery (1...,0.15997,6.0
6,6.0,Almost Famous (2000) - Drama,0.159379,7.0
7,6.0,"Crouching Tiger, Hidden Dragon (Wo hu cang lon...",0.159342,8.0
8,6.0,"Terminator, The (1984) - Action|Sci-Fi|Thriller",0.159328,9.0
9,6.0,Signs (2002) - Horror|Sci-Fi|Thriller,0.158082,10.0
