In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [4]:
rating_file_path = './ratings.dat'
movie_file_path = './movies.dat'
user_file_path = './users.dat'

rating_data = pd.read_csv(rating_file_path, names=['user_id', 'movie_id', 'rating', 'time'], delimiter='::')
movie_data = pd.read_csv(movie_file_path, names=['movie_id', 'title', 'genre'], delimiter='::')
user_data = pd.read_csv(user_file_path, names=['user_id', 'gender', 'age', 'occupation', 'zipcode'], delimiter='::')

In [5]:
rating_data.head()

Unnamed: 0,user_id,movie_id,rating,time
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [8]:
rating = rating_data[['user_id', 'movie_id', 'rating']].set_index(['user_id', 'movie_id']).unstack()
rating.head()

Unnamed: 0_level_0,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating
movie_id,1,2,3,4,5,6,7,8,9,10,...,3943,3944,3945,3946,3947,3948,3949,3950,3951,3952
user_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1,5.0,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,2.0,,,,,...,,,,,,,,,,


In [11]:
from surprise import SVD, Dataset, Reader, accuracy
from surprise.model_selection import train_test_split

In [12]:
reader = Reader(rating_scale=(1 ,5))

data = Dataset.load_from_df(rating_data[['user_id', 'movie_id', 'rating']], reader)

In [13]:
train_data = data.build_full_trainset()
model = SVD(n_factors=8, n_epochs=20)
model.fit(train_data)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x137d54090>

In [14]:
target_user = 4
target_user_data = rating_data[rating_data['user_id'] == target_user]
target_user_data.head()

Unnamed: 0,user_id,movie_id,rating,time
233,4,3468,5,978294008
234,4,1210,3,978293924
235,4,2951,4,978294282
236,4,1214,4,978294260
237,4,1036,4,978294282


In [15]:
ori_dic = {} 

for index, row in target_user_data.iterrows():
    movie_id = row['movie_id']
    ori_dic[movie_id] = row['rating']

print(ori_dic)

{3468: 5, 1210: 3, 2951: 4, 1214: 4, 1036: 4, 260: 5, 2028: 5, 480: 4, 1196: 2, 1198: 5, 1954: 5, 1097: 4, 3418: 4, 3702: 4, 2366: 4, 1387: 5, 3527: 1, 1201: 5, 2692: 5, 2947: 5, 1240: 5}


In [16]:
test_data = []
for index, row in movie_data.iterrows():
    movie_id = row['movie_id']
    rating = 0
    if movie_id in ori_dic:
        continue
    test_data.append((target_user, movie_id, rating))

In [19]:
target_user_predict = model.test(test_data)

In [None]:
def get_user_predict_ratings(predict, user_id, user_history):
    target_predict_dic = {}
    for u, m, rating, p_rating, _ in predict:
        if user_id == u:
            if m not in user_history:
                target_predict_dic[m] = p_rating
    return target_predict_dic
