In [None]:
from scipy.stats import beta
import operator
import numpy as np
from scipy.special import betaln

In [None]:
def load_1_m():
    ratings = open('../ml-1m/ratings.dat', 'r').read().split('\n')
    #ratings = open('../ml-100k/u.data','r').read().split('\n')
    user_item_map = {}
    for r in ratings:
        attrs = r.split('::')
        #attrs = r.split('\t')
        if len(attrs) < 4:
            continue
        user = int(attrs[0])
        item = int(attrs[1])
        rating = int(attrs[2])
        if user in user_item_map:
            user_item_map[user][item] = rating
        else:
            user_item_map[user] = {}
            user_item_map[user][item] = rating
    for user in user_item_map:
        sum = 0
        for item in user_item_map[user]:
            sum += user_item_map[user][item]
        avg_rating_user = sum * 1.0 / len(user_item_map[user])
        for item in user_item_map[user]:
            if user_item_map[user][item] >= avg_rating_user:
                user_item_map[user][item] = 1
            else:
                user_item_map[user][item] = 0
    for user in user_item_map:
        if len(user_item_map[user]) < 10:
            del user_item_map[user]
    return user_item_map

In [None]:
def form_graph(user_item_map):
    graph = {}
    for user in user_item_map:
        if 'u' + str(user) not in graph:
            graph['u' + str(user)] = set([])
        for item in user_item_map[user]:
            if 'i' + str(item) not in graph:
                graph['i' + str(item)] = set([])
            graph['u' + str(user)].add('i' + str(item))
            graph['i' + str(item)].add('u' + str(user))
    return graph

In [None]:
def clean_graph(graph):
    while True:
        changed = False
        delete_nodes = []
        for node in graph:
            if len(graph[node]) < 10:
                changed = True
                delete_nodes.append(node)
        for node in delete_nodes:
            del graph[node]
        for node1 in graph:
            delete_nodes = []
            for node2 in graph[node1]:
                if node2 not in graph:
                    changed = True
                    delete_nodes.append(node2)
            for node2 in delete_nodes:
                graph[node1].remove(node2)
        if not changed:
            break
    for node in graph:
        graph[node] = list(graph[node])
    return graph

In [None]:
def get_num_ratings(user_item_map):
    item_rating_map = {}
    for user in user_item_map:
        for item in user_item_map[user]:
            if item not in item_rating_map:
                item_rating_map[item] = [1, 1]
            if user_item_map[user][item] == 0:
                item_rating_map[item][1] += 1
            else:
                item_rating_map[item][0] += 1
    return item_rating_map

In [None]:
user_item_map = load_1_m()

In [None]:
item_rating_map = get_num_ratings(user_item_map)

In [None]:
graph = form_graph(user_item_map)

In [None]:
graph = clean_graph(graph)

In [None]:
PIS_map = {}
PPS_map = {}
PORS_map = {}

In [None]:
def PIS(item_pair):
    item1 = int(item_pair[0][1:])
    item2 = int(item_pair[1][1:])
    total = 0
    total += np.exp(betaln(item_rating_map[item1][0]+i,item_rating_map[item1][1]+item_rating_map[item2][1]) -\
                        np.log(item_rating_map[item2][1]+i) - \
                        betaln(1+i, item_rating_map[item2][1]) -\
                        betaln(item_rating_map[item1][0],item_rating_map[item1][1])
                       )
    return total

In [None]:
def PPS(item_pair):
    item1 = int(item_pair[0][1:])
    item2 = int(item_pair[1][1:])
    p1 = (item_rating_map[item1][0]) * 1.0 / (item_rating_map[item1][0] + item_rating_map[item1][1])
    p2 = (item_rating_map[item2][0]) * 1.0 / (item_rating_map[item2][0] + item_rating_map[item2][1])
    return p1 * p2

In [None]:
def PORS(item_pair):
    item1 = int(item_pair[0][1:])
    item2 = int(item_pair[1][1:])
    o1 = (item_rating_map[item1][0]) * 1.0 / (item_rating_map[item1][1])
    o2 = (item_rating_map[item2][0]) * 1.0 / (item_rating_map[item2][1])
    return o2 / o1

In [None]:
def rank(graph, target_user):
    score_map_PPS = {}
    score_map_PORS = {}
    score_map_PIS = {}
    for primary_item in graph[target_user]:
        score_map_PPS[primary_item] = 0.0
        score_map_PORS[primary_item] = 0.0
        score_map_PIS[primary_item] = 0.0
        for secondary_user in graph[primary_item]:
            if secondary_user == target_user:
                continue
            for secondary_item in graph[secondary_user]:
                if secondary_item in graph[target_user]:
                    continue
                if (primary_item, secondary_item) in PIS_map:
                    score_map_PIS[primary_item] += PIS_map[(primary_item, secondary_item)]
                else:
                    PIS_map[(primary_item, secondary_item)] = PIS((primary_item, secondary_item))
                    score_map_PIS[primary_item] += PIS_map[(primary_item, secondary_item)]
                if (primary_item, secondary_item) in PPS_map:
                    score_map_PPS[primary_item] += PPS_map[(primary_item, secondary_item)]
                else:
                    PPS_map[(primary_item, secondary_item)] = PPS((primary_item, secondary_item))
                    score_map_PPS[primary_item] += PPS_map[(primary_item, secondary_item)]
                if (primary_item, secondary_item) in PORS_map:
                    score_map_PORS[primary_item] += PORS_map[(primary_item, secondary_item)]
                else:
                    PORS_map[(primary_item, secondary_item)] = PORS((primary_item, secondary_item))
                    score_map_PORS[primary_item] += PORS_map[(primary_item, secondary_item)]
    return score_map_PIS, score_map_PPS, score_map_PORS

In [None]:
ranking = rank(graph, 'u1')

In [None]:
ranking_PIS = ranking[0]
ranking_PPS = ranking[1]
ranking_PORS = ranking[2]

In [None]:
sorted_1 = sorted(ranking_PIS.items(), key=operator.itemgetter(1))
sorted_2 = sorted(ranking_PPS.items(), key=operator.itemgetter(1))
sorted_3 = sorted(ranking_PORS.items(), key=operator.itemgetter(1))

print sorted_1[:5]
print sorted_2[:5]
print sorted_3[:5]

In [None]:
print ranking_PIS