## User-Based CF 预测电影评分

- 应用基于用户的协同过滤实现电影评分预测
- 应用基于物品的协同过滤实现电影评分预测

- 加载ratings.csv，转换为用户-电影评分矩阵并计算用户之间相似度

In [9]:
import pandas as pd
import numpy as np

data_path = 'data/ratings.csv'
dtype = {"userId":np.int32,"movieId":np.int32,"rating":np.float32}
# 加载数据，我们只用前三列数据 用户Id 电影Id 和 评分
ratings = pd.read_csv(data_path,dtype=dtype,usecols=range(3))
ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


##  基于电影的评分预测

In [18]:
# 透视表，将电影ID转换为列名称，转换为一个User-Movie的评分矩阵
rating_matrix = ratings.pivot_table(index=['userId'],columns=['movieId'],values='rating')
# rating_df = rating_matrix.reset_index()
# 计算用户之间的相似度
item_similar = rating_matrix.corr()
item_similar.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,0.330978,0.487109,1.0,0.310971,0.106465,0.208402,0.968246,0.095913,-0.021409,...,,,,,,,,,,
2,0.330978,1.0,0.419564,,0.562791,0.16351,0.430261,0.415227,0.27735,0.016626,...,,,,,,,,,,
3,0.487109,0.419564,1.0,,0.602266,0.345069,0.554088,0.333333,0.458591,-0.050276,...,,,,,,,,,,
4,1.0,,,1.0,0.654654,,0.203653,,,0.870388,...,,,,,,,,,,
5,0.310971,0.562791,0.602266,0.654654,1.0,0.291302,0.609119,0.555556,0.319173,0.218263,...,,,,,,,,,,


In [19]:
rating_matrix.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,,4.0,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,,,,,,,,,,...,,,,,,,,,,


In [17]:
# 找出id物品的相似物品
similar_items = item_similar[1].drop([1]).dropna()
# 相似物品筛选规则，正相关的物品
similar_items = similar_items.where(similar_items>0).dropna()
# 从iid物品的近邻相似物品中筛选出用户评过分的用户
ids = set(rating_matrix.loc[1].dropna().index)&set(similar_items.index)
# 对电影一评过分的用户还对那些电影评过分，及他们的相似度
finally_similar_items = similar_items.loc[list(ids)]
finally_similar_items.head()

movieId
2048    0.254824
1025    0.236713
3       0.487109
1029    0.464971
6       0.106465
Name: 1, dtype: float64

In [24]:
# 预测评分
num = 0 # 公式分子部分的值
deno = 0 # 公式分母部分的值
for similar_iid,similarity in finally_similar_items.iteritems():
    # 近邻物品的评分数据
    sim_item_rated_movies = rating_matrix[similar_iid].dropna()
    # 1 用户对相似物品的评分
    sim_item_rating_from_user = sim_item_rated_movies[1]
    # print(sim_item_rated_movies)
    # print(sim_item_rating_from_user)
    # 计算分子的值
    num += similarity * sim_item_rating_from_user
    # 分母的值
    deno += similarity
    
# 用户对相似电影的评分
predict_rating = num/deno
print("预测出用户<%d>对电影<%d>的评分：%0.2f" % (1, 1, predict_rating))

userId
1      5.0
19     3.0
20     3.5
186    3.0
212    3.5
380    4.0
414    3.0
525    3.5
Name: 2048, dtype: float32
5.0
userId
1      5.0
4      4.0
20     5.0
39     4.0
68     2.5
103    4.0
171    4.0
177    4.0
211    4.0
226    4.0
274    3.5
288    2.0
304    4.0
328    3.5
387    3.0
395    3.0
414    4.0
474    3.0
479    3.0
484    5.0
509    2.5
517    2.5
600    3.5
605    3.0
608    3.5
Name: 1025, dtype: float32
5.0
userId
1      4.0
6      5.0
19     3.0
32     3.0
42     4.0
43     5.0
44     3.0
51     4.0
58     3.0
64     3.5
68     2.0
91     3.0
100    3.5
102    5.0
116    3.5
117    3.0
150    3.0
151    3.0
169    5.0
179    4.0
217    1.0
226    3.5
240    4.0
269    4.0
270    3.0
288    4.0
289    2.5
294    1.0
302    3.0
307    3.5
308    0.5
321    3.0
330    3.0
337    4.0
368    3.0
410    4.0
414    4.0
448    3.0
456    3.0
470    3.0
477    3.0
480    2.5
492    4.0
501    5.0
544    3.0
552    1.0
555    5.0
588    3.0
590    3.0
594    4.0
599 

userId
1      5.0
4      2.0
6      4.0
8      4.0
13     5.0
14     5.0
15     3.5
16     3.5
17     4.0
18     4.5
19     3.0
26     4.0
28     3.0
39     3.0
41     3.5
42     4.0
43     4.0
47     3.0
54     3.0
56     5.0
58     5.0
62     4.5
63     4.0
64     4.5
66     5.0
68     4.0
72     4.5
75     2.0
76     3.5
81     3.0
      ... 
505    5.0
507    3.0
512    5.0
514    4.0
517    2.0
524    5.0
525    3.5
534    4.0
540    4.5
541    1.0
542    5.0
551    4.5
560    4.0
561    4.5
565    4.0
570    3.5
573    5.0
574    5.0
580    5.0
584    5.0
588    3.0
590    3.0
597    4.0
599    4.0
600    4.0
601    4.0
602    5.0
606    3.0
608    4.5
610    5.0
Name: 47, Length: 203, dtype: float32
5.0
userId
1      4.0
20     4.0
45     4.0
68     3.0
91     3.0
103    4.0
177    4.0
217    2.0
230    3.0
232    2.5
274    3.0
288    3.0
292    3.0
380    3.0
414    4.0
514    4.0
517    4.0
525    4.0
554    4.0
579    4.0
599    2.5
600    3.0
Name: 2096, dtype: float32
4.0


userId
1      5.0
19     3.0
51     5.0
57     4.0
64     3.5
68     3.5
69     5.0
73     4.5
76     1.5
91     4.5
103    4.0
104    3.5
115    5.0
122    4.5
127    4.0
137    4.0
167    3.5
182    3.0
199    4.5
203    2.5
216    3.0
219    3.5
220    4.5
221    4.5
226    5.0
255    2.0
274    4.0
288    4.0
290    4.0
305    4.0
      ... 
328    4.5
332    3.0
335    4.5
380    5.0
387    4.0
408    4.0
409    5.0
410    4.0
414    4.0
422    4.0
428    3.0
448    5.0
450    4.0
465    5.0
469    4.0
474    2.0
477    4.5
479    5.0
480    0.5
488    4.0
502    5.0
514    4.0
525    4.0
538    4.5
555    4.0
567    4.5
590    3.0
599    3.5
600    4.0
610    5.0
Name: 3671, Length: 62, dtype: float32
5.0
userId
1      4.0
66     5.0
137    3.5
225    2.0
290    5.0
305    4.5
380    5.0
452    4.0
474    3.5
562    3.5
607    3.0
Name: 2648, dtype: float32
4.0
userId
1      5.0
18     3.5
19     4.0
28     2.0
42     4.0
48     4.5
59     5.0
64     3.0
66     4.5
68     2.0
80 

5.0
userId
1      4.0
19     2.0
57     4.0
66     4.0
68     2.5
91     5.0
135    5.0
182    4.0
186    3.0
202    4.0
207    3.5
274    3.5
288    3.5
307    4.0
313    5.0
368    2.0
380    5.0
414    5.0
428    3.5
448    3.0
465    4.0
483    3.5
514    4.0
539    4.5
561    5.0
599    4.0
603    3.0
610    4.5
Name: 3740, dtype: float32
4.0
userId
1      4.0
19     2.0
20     1.0
82     3.5
139    1.5
155    4.0
182    3.5
249    3.0
274    2.0
292    2.0
293    2.0
352    2.0
428    3.0
438    3.5
448    1.5
453    4.0
600    1.0
603    4.0
610    3.0
Name: 3744, dtype: float32
4.0
userId
1      3.0
18     3.0
19     1.0
27     2.0
44     1.0
55     0.5
57     1.0
76     0.5
111    2.5
132    2.0
135    2.0
141    4.0
151    4.0
160    2.0
177    3.0
191    5.0
217    3.0
219    3.5
222    2.5
232    2.5
234    3.0
240    5.0
249    3.0
274    3.5
276    4.0
277    3.0
294    4.0
305    4.0
313    1.0
314    3.0
318    3.5
350    3.0
355    3.0
380    4.0
381    3.0
385    3.0


Name: 736, Length: 123, dtype: float32
3.0
userId
1      4.0
21     3.5
24     3.5
34     5.0
57     3.0
64     3.0
68     2.5
82     4.0
115    4.0
131    2.5
135    4.0
160    1.0
182    3.5
217    3.0
219    3.5
226    3.5
249    4.0
261    2.5
274    2.5
362    3.5
368    2.0
380    4.0
381    4.0
382    3.5
408    4.0
438    3.0
452    5.0
453    3.0
469    4.0
480    2.5
483    3.0
489    1.5
495    4.5
534    4.0
560    3.5
561    2.5
564    3.0
590    3.0
597    4.0
599    3.0
600    1.0
610    3.0
Name: 2273, dtype: float32
4.0
userId
1      4.0
4      3.0
19     3.0
27     1.0
51     3.0
64     4.0
66     4.0
68     3.5
78     3.0
91     4.0
135    3.0
169    4.0
177    2.0
182    3.5
200    2.0
226    4.5
232    3.5
288    3.0
325    2.0
356    5.0
367    5.0
368    3.0
369    3.0
371    4.0
414    3.0
469    4.0
474    3.0
477    4.0
484    4.0
520    3.0
555    3.0
561    4.0
580    2.5
599    2.5
608    1.5
Name: 3809, dtype: float32
4.0
userId
1      5.0
6      3.0
8    

Name: 2353, Length: 67, dtype: float32
5.0
userId
1      5.0
29     4.5
51     4.0
93     5.0
158    2.5
182    3.5
183    3.0
186    3.0
202    5.0
239    3.5
288    3.0
313    4.0
332    3.5
390    2.0
414    5.0
462    2.5
469    5.0
474    4.0
480    3.0
493    3.0
508    1.5
527    5.0
597    4.0
599    3.5
603    3.0
Name: 2872, dtype: float32
5.0
userId
1      4.0
39     5.0
70     4.5
177    3.0
186    4.0
217    3.0
290    5.0
312    4.0
313    2.0
325    4.0
332    3.5
345    3.5
348    4.0
368    4.0
380    5.0
448    5.0
469    3.0
493    2.0
508    1.5
527    3.0
532    4.0
571    3.0
577    3.0
597    5.0
599    3.0
Name: 2366, dtype: float32
4.0
userId
1      5.0
2      4.0
6      5.0
19     2.0
42     5.0
45     4.0
51     2.0
56     3.0
58     5.0
64     4.5
68     4.0
69     5.0
99     5.0
117    3.0
136    5.0
155    4.0
169    4.5
174    4.0
182    3.5
203    1.0
225    5.0
226    4.0
240    5.0
274    3.5
276    5.0
284    4.0
294    1.0
298    3.0
307    3.0
335  

userId
1      5.0
4      2.0
10     0.5
15     2.5
16     3.5
17     4.5
18     4.5
19     5.0
21     2.0
22     3.5
28     3.5
34     4.0
41     4.0
42     3.0
45     5.0
47     2.0
50     3.0
52     4.5
62     5.0
63     5.0
64     4.5
65     4.5
66     5.0
67     4.5
68     2.5
76     4.5
80     5.0
86     3.5
91     4.5
101    5.0
      ... 
532    5.0
533    5.0
540    4.5
542    5.0
543    5.0
549    5.0
551    4.5
552    4.5
553    5.0
555    4.0
560    4.5
561    4.0
562    4.5
570    4.0
573    5.0
580    5.0
581    4.5
582    4.0
585    5.0
590    3.5
591    2.0
593    4.0
596    4.0
599    5.0
600    4.5
601    5.0
603    4.0
606    5.0
608    5.0
610    5.0
Name: 2959, Length: 218, dtype: float32
5.0
userId
1      5.0
4      5.0
18     4.0
20     5.0
33     5.0
36     1.5
45     3.0
51     2.0
64     4.0
66     3.5
68     3.0
84     5.0
91     3.0
100    4.5
104    3.0
115    4.0
132    3.5
135    5.0
137    5.0
140    4.0
156    4.0
169    5.0
171    4.0
177    5.0
182    

Name: 2005, dtype: float32
5.0
userId
1      5.0
4      4.0
19     2.0
27     2.0
28     3.0
42     4.0
45     4.0
57     3.0
64     3.5
66     3.5
68     3.5
95     4.0
96     1.0
104    4.0
141    4.0
169    4.0
182    4.0
198    3.0
205    4.0
217    2.0
219    4.5
226    5.0
274    3.5
282    4.0
287    4.0
288    4.0
292    2.0
298    3.5
303    5.0
305    3.5
307    3.0
312    3.0
313    2.0
332    3.0
354    3.5
369    4.0
380    5.0
387    3.5
391    3.0
405    3.0
408    4.5
414    4.0
428    2.5
438    3.5
448    4.0
469    2.0
474    3.0
477    3.5
480    2.5
483    4.0
514    2.5
525    4.0
555    3.0
561    4.5
590    4.0
597    3.0
599    4.0
606    4.0
610    3.5
Name: 3033, dtype: float32
5.0
userId
1      5.0
19     3.0
20     5.0
39     3.0
50     3.0
68     3.0
75     4.0
103    4.5
104    4.0
129    4.0
160    4.0
177    4.0
185    3.0
211    4.5
220    5.0
226    4.0
247    3.0
249    4.0
264    2.5
274    3.5
287    1.0
288    3.0
328    2.5
380    3.0
387    3.0


### 封装成方法

In [None]:
# 计算所有的电影每个电影的相关电影
