In [16]:
import itertools as it
import pandas as pd
import requests as rq
from IPython.display import display

In [17]:
# Parameters
users = "../data/users.json"
host = "localhost"

hash_function = "murmur2"
partition_port = {"0":"5000", "1":"5001"}
limits = [10]
walks = [1000]
walk_length = [100]

should_merge = True

output_recommendations = f"../data/{hash_function}_recommendations.json"
output_merge_recommendations = f"../data/{hash_function}_merge_recommendations.json"

In [18]:
# Import users
user_df = pd.read_json(users, orient="table")

# Compute cross product of all configuration values
configurations = list(it.product([hash_function], partition_port.items(), walks, walk_length, limits))
configurations

[('murmur2', ('0', '5000'), 1000, 100, 10),
 ('murmur2', ('1', '5001'), 1000, 100, 10)]

In [19]:
def get_recommendations(user_df, walks, walk_length, limit, partition_port):
    rows = []
    partition = partition_port[0]
    port = partition_port[1]
    print(f"gathering recommendations from port: {port} and partition {partition} hash_function: {hash_function}")

    for i, row in user_df.iterrows():
        user = row["user_id"]
        url = f"http://{host}:{port}/recommendation/salsa/{user}?walks={walks}&walk_length={walk_length}&limit={limit}"
        response = rq.get(url)

        response_body = response.json() if response.status_code == 200 else []
        recommendations = [res['id'] for res in response_body]
        hits = [res['hit'] for res in response_body]
        rows.append((hash_function, partition, user, recommendations, hits))

    return pd.DataFrame(rows, columns=["hash_function", "partition", "user", "recommendations", "hits"])

In [20]:
# Perform API requests
result_dfs = []

for (hash_function, partition_port, walks, walk_length, limit) in configurations:
    result = get_recommendations(user_df, walks, walk_length, limit, partition_port)
    result_dfs.append(result)

result_df = pd.concat(result_dfs)
display(result_df[["user", "recommendations", "hits", "partition"]])
result_df.to_json(output_recommendations, index=False, orient="table")

gathering recommendations from port: 5000 and partition 0 hash_function: murmur2
gathering recommendations from port: 5001 and partition 1 hash_function: murmur2


Unnamed: 0,user,recommendations,hits,partition
0,356515976,"[1367250900041953280, 1367272378766614529, 1362498014565310465, 1361682280742232071, 332282461344124931, 1364270912661553153, 1367205667321171968, 1367295813030875137, 1366448694111973381, 1367254342286401538]","[81, 72, 49, 49, 46, 46, 45, 43, 42, 39]",0
1,998140573008695296,"[1367198091053441027, 1366820931184717824, 1367272378766614529, 1365757552055648256, 332282461344124931, 1367250900041953280, 1367295813030875137, 1366448694111973381, 1367364251233497095, 1364826301027115008]","[85, 84, 82, 67, 60, 55, 52, 50, 50, 49]",0
2,818876014390603776,"[1360287642408132613, 1367578811634704385, 1362133655913644034, 1350587005755944960, 1368951854504247299, 1360579041909219332, 1364943316496826379, 556102338923229184, 1314035151245053958, 1244295717629370369]","[281, 274, 265, 203, 186, 183, 182, 160, 140, 133]",0
3,746247276,"[1367250900041953280, 1367305184628649985, 1367460088252882949, 1367568143552544770, 1367534350724517890, 1364585606685351940, 1367645494445568003, 1364826301027115008, 1367304018217865233, 1367213114228318209]","[96, 62, 60, 60, 57, 56, 52, 51, 48, 45]",0
4,716709528,[],[],0
...,...,...,...,...
495,783719151684685824,"[1368599913320173568, 1367608626110738436, 1367925520042586112, 1270519903460528130, 1367556243871707136, 1367875049995169794, 1305307998596800512, 1273550743308386304, 1360343382556483587, 1358780911370842114]","[213, 197, 192, 180, 171, 167, 154, 146, 143, 142]",1
496,1326584656658132994,[],[],1
497,330262748,"[1354504065993084928, 1355766164450902019, 1355401948606947330, 1363563589378994178, 1354805587897327617, 1359897952223961088, 1365304148187623427, 1362845879900770305, 1364127962765869056, 1356380528451219462]","[201, 193, 185, 180, 179, 170, 169, 163, 161, 157]",1
498,717783559579144193,[],[],1


In [21]:
def merge_sort_rec_hit(row):
    if row["len_rec"] == 10:
        return row["recommendations"], row["hits"]
    rec_hit_dic = dict(zip(row["recommendations"], row["hits"]))
    sorted_by_hits = dict(sorted(rec_hit_dic.items(), key=lambda item: item[1], reverse=True))
    rec_hit_dic = dict(list(sorted_by_hits.items())[:10])
    return list(rec_hit_dic.keys()), list(rec_hit_dic.values())

In [22]:
# Merge partition results
if should_merge:
    merge_df = result_df.groupby("user", as_index=False).agg({'recommendations': 'sum', 'hits': 'sum'})
    merge_df["hash_function"] = hash_function
    merge_df["partition"] = "merged"
    merge_df["len_rec"] = merge_df.apply(lambda x: len(x["recommendations"]), axis=1)
    pd.set_option('display.max_colwidth', None)
    merge_df[["recommendations", "hits"]] = merge_df.apply(lambda row: merge_sort_rec_hit(row), axis=1, result_type="expand")
    # Save results
    merge_df.to_json(output_merge_recommendations, index=False, orient="table")
