# 基于用户的协同过滤推荐—实现电影推荐

> [基于用户的协同过滤推荐—实现电影推荐](https://blog.csdn.net/Augster/article/details/53352653)

> [基于用户的协同过滤是怎么推荐电影的？](https://www.jianshu.com/p/fc0f6f00ac7e)

In [2]:
!pip install texttable

Collecting texttable
  Downloading https://files.pythonhosted.org/packages/bd/a8/45aa1c64f0aea4967766d9b46be3c0b03b096d8fb8c3cfe119884cfeed5e/texttable-1.5.0.tar.gz
Building wheels for collected packages: texttable
  Running setup.py bdist_wheel for texttable ... [?25l- \ done
[?25h  Stored in directory: /home/miaopei/.cache/pip/wheels/5a/6c/5d/76ac3d8f6107535ff4ac20bfe6e3795e5093a8b4607daa3bc6
Successfully built texttable
Installing collected packages: texttable
Successfully installed texttable-1.5.0
[33mYou are using pip version 9.0.1, however version 18.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [3]:
import math
from texttable import Texttable
 
def calcSimlaryCosDist(user1, user2):
    sum_x = 0.0
    sum_y = 0.0
    sum_xy = 0.0
    avg_x = 0.0
    avg_y = 0.0
    for key in user1:
        avg_x += key[1]
    avg_x = avg_x / len(user1)
 
    for key in user2:
        avg_y += key[1]
    avg_y = avg_y / len(user2)
 
    for key1 in user1:
        for key2 in user2:
            if key1[0] == key2[0]:
                sum_xy += (key1[1] - avg_x) * (key2[1] - avg_y)
                sum_y += (key2[1] - avg_y) * (key2[1] - avg_y)
        sum_x += (key1[1] - avg_x) * (key1[1] - avg_x)
 
    if sum_xy == 0.0:
        return 0
    sx_sy = math.sqrt(sum_x * sum_y)
    return sum_xy / sx_sy
 
def readFile(file_name):
    f=open(file_name,"r",encoding='utf-8')
    line=[]
    line=f.readlines()
    f.close()
    return line
 
#读取电影信息,返回电影的字典，key值为电影ID，value值为电影信息
def getMoviesList(file_name):
    lines=readFile(file_name)
    movie_info={}
    for movie in lines:
        arr=movie.split("|")
        movie_info[int(arr[0])]=arr[1:]
    return movie_info
#将rating文件中的信息转化为数组格式
#返回用户ID，电影ID，评分，时间戳的格式
def getRatingInformation(ratings):
    r=[]
    for line in ratings:
        rate=line.split('\t')
        r.append([int(rate[0]),int(rate[1]),int(rate[2])])
    return r
#生成用户评分的数据结构
#输入：[[2,1,5],[2,4,2]...]，用户2对电影1的评分是5分
#输出：用户打分字典和电影与值打分关联用户的字典
#rate_dic[2]=[(1,5),(4,2)].... 表示用户2对电影1的评分是5，对电影4的评分是2
def createUserRankDic(rates):
    user_rate_dict={}
    item_to_user={}
    for i in rates:
        user_rank=(i[1],i[2])
        #用户和电影评分之间的字典
        if i[0] in user_rate_dict:
            user_rate_dict[i[0]].append(user_rank)
        else:
            user_rate_dict[i[0]]=[user_rank]
        #每一部电影和与之相关的用户字典
        if i[1] in item_to_user:
            item_to_user[i[1]].append(i[0])
        else:
            item_to_user[i[1]]=[i[0]]
    return user_rate_dict,item_to_user
 
#计算与制定的邻居之间最为相近的邻居
#输入：指定的用户ID，用户对电影的评分表，电影对应的用户表
#输出：与制定用户最为相邻的邻居列表
#    1.用户字典：dic[用户id]=[(电影id,电影评分)...]
#    2.电影字典：dic[电影id]=[用户id1,用户id2...]
def calcNearestNeighbor(userid,user_dict,item_dict):
    neighbors=[]
    for item in user_dict[userid]:
        #在每一部电影与之相关的用户中查找邻居
        for neighbor in item_dict[item[0]]:
            if neighbor!=userid and neighbor not in neighbors:
                neighbors.append(neighbor)
    #计算相似度并输出
    neighbors_dist=[]
    for neighbor in neighbors:
        dist=calcSimlaryCosDist(user_dict[userid],user_dict[neighbor])
        neighbors_dist.append([dist,neighbor])
    neighbors_dist.sort(reverse=True)
    return neighbors_dist
 
def recommendationByUserFC(file_name,userid,k=5):
    test_contents=readFile(file_name)   #读取文件
    test_rates=getRatingInformation(test_contents)  #得到用户电影评分之间关系的标准格式
    # 格式化成字典数据
    #    1.用户字典：dic[用户id]=[(电影id,电影评分)...]
    #    2.电影字典：dic[电影id]=[用户id1,用户id2...]
    test_dict,test_item_to_user=createUserRankDic(test_rates)
    #计算与userid最为相近的前k个用户，返回数组的格式为[[相似度，用户id]...]
    neighbors=calcNearestNeighbor(userid,test_dict,test_item_to_user)[:k]
    #计算邻居的每一部电影与被推荐用户之间的相似度大小
    recommend_dict={}
    for neighbor in neighbors:
        neighbor_user_id=neighbor[1]                       #邻居用户的ID
        movies=test_dict[neighbor_user_id]                 #邻居用户对电影的评分列表
        #计算每一部电影对用户的推荐程度大小
        for movie in movies:
            if movie[0] not in recommend_dict:
                recommend_dict[movie[0]]=neighbor[0]
            else:
                recommend_dict[movie[0]]+=neighbor[0]
    #建立推荐的列表
    recommend_list=[]
    for key in recommend_dict:
        recommend_list.append([recommend_dict[key],key])          #将字典转化为list，其中元素的第一项为推荐程度大小，第二项为电影的ID
    recommend_list.sort(reverse=True)                             #根据推荐的程度大小进行排序
    user_movies=[i[0] for i in test_dict[userid]]                 #userid用户评分过的所有电影
    return [i[1] for i in recommend_list], user_movies, test_item_to_user, neighbors
 
if __name__=='__main__':
    movies=getMoviesList('u.item')     #获取电影的列表
    recommend_list, user_movie, items_movie, neighbors = recommendationByUserFC('u.data',1,80)
    neighbors_id=[i[1] for i in neighbors]     #所有邻居的ID
    table = Texttable()
    table.set_deco(Texttable.HEADER)
    table.set_cols_dtype(['t',  # text
                          't',  # float (decimal)
                          't'])  # automatic
    table.set_cols_align(["l", "l", "l"])
    rows = []
    rows.append([u"movie name", u"release", u"from userid"])
    #输出前20个推荐项
    for movie_id in recommend_list[:20]:
        from_user = []
        for user_id in items_movie[movie_id]:
            if user_id in neighbors_id:
                from_user.append(user_id)
        rows.append([movies[movie_id][0], movies[movie_id][1], from_user[:3]])
    table.add_rows(rows)
    print(table.draw())


FileNotFoundError: [Errno 2] No such file or directory: 'u.item'