# 推荐系统 案例分析

In [1]:
import pandas as pd
import numpy as np

## 准备数据

In [2]:
DATA_PATH = "./data/ratings.csv"
dtype = {"userId":np.int32,"movieId":np.int32,"rating":np.float32}

In [3]:
df = pd.read_csv(DATA_PATH,dtype=dtype,usecols=range(3))

In [4]:
df.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


## 构造用户评分矩阵
**用户对物品的评分矩阵**

In [5]:
rating_matrix = df.pivot_table(index=["userId"],columns=["movieId"],values="rating")

In [6]:
rating_matrix

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,,4.0,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,,,,,,2.5,,,,...,,,,,,,,,,
607,4.0,,,,,,,,,,...,,,,,,,,,,
608,2.5,2.0,2.0,,,,,,,4.0,...,,,,,,,,,,
609,3.0,,,,,,,,,4.0,...,,,,,,,,,,


## 构造相似度矩阵

In [7]:
similarity = rating_matrix.T.corr()  # User  相似度矩阵

In [8]:
similarity

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.000000,,0.079819,0.207983,0.268749,-0.291636,-0.118773,0.469668,0.918559,-0.037987,...,9.157371e-02,-1.597727e-16,-0.061503,-0.407556,-0.164871,0.066378,0.174557,0.268070,-0.175412,-0.032086
2,,1.000000,,,,,-0.991241,,,0.037796,...,-3.873468e-01,,-1.000000,,,0.583333,,-0.125000,,0.623288
3,0.079819,,1.000000,,,,,,,,...,,,0.433200,,,-0.791334,-0.333333,-0.395092,,0.569562
4,0.207983,,,1.000000,-0.336525,0.148498,0.542861,0.117851,,0.485794,...,-2.221127e-01,3.966413e-01,0.090090,-0.080296,0.400124,0.144603,0.116518,-0.170501,-0.277350,-0.043786
5,0.268749,,,-0.336525,1.000000,0.043166,0.158114,0.028347,,-0.777714,...,2.719480e-16,1.533034e-01,0.234743,0.067791,-0.364156,0.244321,0.231080,-0.020546,0.384111,0.040582
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.066378,0.583333,-0.791334,0.144603,0.244321,-0.049192,0.137771,0.253582,0.572700,-0.382955,...,2.904896e-01,1.406134e-01,0.318473,0.682949,0.167062,1.000000,0.114191,0.240842,0.533002,0.389185
607,0.174557,,-0.333333,0.116518,0.231080,0.255639,0.402792,0.251280,,-0.241121,...,6.982411e-01,2.172105e-01,0.192787,0.035806,-0.299641,0.114191,1.000000,0.200814,0.190117,0.106605
608,0.268070,-0.125000,-0.395092,-0.170501,-0.020546,0.125428,0.008081,0.434423,0.336625,-0.571043,...,4.739665e-01,2.976461e-01,0.086423,0.053986,-0.075673,0.240842,0.200814,1.000000,0.488929,0.147606
609,-0.175412,,,-0.277350,0.384111,0.193649,0.420288,0.141860,,,...,1.000000e+00,1.885115e-01,0.343303,0.641624,-0.550000,0.533002,0.190117,0.488929,1.000000,-0.521773


In [9]:
# 预测 用户 1 和 物品 1 的评分
similar_users = similarity[1].drop([1]).dropna()

In [10]:
# 只要正相关的相似度用户
similar_users = similar_users.where(similar_users>0).dropna() 

In [14]:
# 在所有正相关的用户中 找到所有消费过 物品 1 的用户
ids = rating_matrix[1].dropna().index & similar_users.index

  


In [20]:
final_similar_users = similar_users.loc[list(ids)]

In [25]:
# 计算用户1 对物品 1 的评分预测
sum_up = 0 # 保存评分预测公式的分子
sum_down = 0  # 保存评分预测公式的分母
for sim_uid,similarity in final_similar_users.items():
    similar_users_rated_movies = rating_matrix.loc[sim_uid].dropna()
    pass
    