In [3]:
from collections import defaultdict
import numpy as np

In [4]:
def read(filename):
    users = set()
    user_directed_graph =  defaultdict(lambda: defaultdict(float))

    ads = set()
    ads_directed_graph = defaultdict(lambda: defaultdict(float))

    input = open(filename, 'r')
    num_entries = int(input.readline())

    for i in range(num_entries):
        unformatted = input.readline()[:-1].split(',')
        score = float(unformatted[-1])
        unformatted.pop()
        cur_user, cur_ad = [int(entry) for entry in unformatted]
        users.add(cur_user)
        user_directed_graph[cur_user][cur_ad] = score

        ads.add(cur_ad)
        ads_directed_graph[cur_ad][cur_user] = score

    predict_user, predict_ad = [int(entry) for entry in input.readline().split(',')]
    
    return users, user_directed_graph, ads, ads_directed_graph, predict_user, predict_ad



In [5]:
users, user_directed_graph, ads, ads_directed_graph, predict_user, predict_ad = read(filename='sample_input.txt')


In [6]:
def simple_simrank(users, user_directed_graph, ads, ads_directed_graph):
    # note that this runs simrank with the partial sums memoization trick!
    C1 = C2 = 0.8
    similarity_user = defaultdict(lambda: defaultdict(float))
    similarity_ads = defaultdict(lambda: defaultdict(float))

    for u in users:
        similarity_user[u][u] = 1.0
    
    for a in ads:
        similarity_ads[a][a] = 1.0

    partial_user = defaultdict(lambda: defaultdict(float))
    partial_ads = defaultdict(lambda: defaultdict(float))


    for iter in range(10):
        for u in users:
            for a in ads:
                temp = 0.0
                for u_ads in user_directed_graph[u]:
                    temp += similarity_ads[u_ads][a]
                partial_user[u][a] = temp
        
        list_users = list(users)
        for i in range(len(list_users)):
            for j in range(i + 1, len(list_users)):
                temp = 0.0
                u1, u2 = list_users[i], list_users[j]
                for a in user_directed_graph[u2]:
                    temp += partial_user[u1][a]
                similarity_user[u1][u2] = similarity_user[u2][u1] =  C1 / (len(user_directed_graph[u1])*len(user_directed_graph[u2])) * temp
    
        for a in ads:
            for u in users:
                temp = 0.0
                for a_user in ads_directed_graph[a]:
                    temp += similarity_user[a_user][u]
                partial_ads[a][u] = temp
        
        list_ads = list(ads)
        for i in range(len(list_ads)):
            for j in range(i+1, len(list_ads)):
                temp = 0.0
                a1, a2 = list_ads[i], list_ads[j]
                for k in ads_directed_graph[a2]:
                    temp += partial_ads[a1][k]
                similarity_ads[a1][a2] = similarity_ads[a2][a1] = C2 / (len(ads_directed_graph[a1])*len(ads_directed_graph[a2])) * temp
            
    return similarity_user, similarity_ads
                

In [11]:
def fetch_top3(to_predict, similarity):
    res = []
    temp = similarity[to_predict].items()
    # return temp
    value_key = {}
    for k,v in temp:
        if k != to_predict:
            if v not in list(value_key.keys()):
                value_key[v] = k
                res.append((k,v))
            else:
                if k < value_key[v]:
                    res.remove((value_key[v], v))
                    value_key[v] = k
                    res.append((k,v))
                    
    res = sorted(res, reverse=True, key= lambda x: (x[1], -x[0]))

    

    return res[:3]

In [12]:
users, user_directed_graph, ads, ads_directed_graph, predict_user, predict_ad = read(filename='input_b.txt')
# similarity_user, similarity_ads = simrank(users, ads, user_directed_graph, ads_directed_graph, 10, 0.8, 0.8)
similarity_user, similarity_ads = simple_simrank(users, user_directed_graph, ads, ads_directed_graph)
top3_users = fetch_top3(predict_user, similarity_user)
top3_ads = fetch_top3(predict_ad, similarity_ads)
top3_users, top3_ads

([(24481, 0.32294552747512184),
  (76584, 0.27223586898312374),
  (5786, 0.25225715544238314)],
 [(0, 0.5345862231972374), (3, 0.4374262162314706), (5, 0.43574833499259097)])

In [20]:
ad_nweights = {}
user_nweights = {}



0.8674284731834108

In [14]:
user_directed_graph[2]

defaultdict(float, {20: 2.0, 38: 2.5})