In [17]:
import pandas as pd
import requests as rq
from IPython.display import display

In [18]:
# Parameters
users = "../data/users.json"
host = "localhost"

port = "5000"
repeat_for_user = 20
limit = 10
walks = 1000
walk_length = 100

output_baseline_recommendations = f"../data/baseline_recommendations.json"

In [19]:
# Import users
user_df = pd.read_json(users, orient="table")

In [20]:
def get_recommendations(user_df, walks, walk_length, limit, port):
    rows = []

    hash_function = "Single"
    partition = "0"
    print(f"gathering recommendations from port: {port} and partition {partition} hash_function: {hash_function}")

    for i, row in user_df.iterrows():
        user_recommendation_df = pd.DataFrame(columns=["recommendations"])
        user = row["user_id"]
        recommendations_series = pd.Series()
        for count in range(repeat_for_user):
            url = f"http://{host}:{port}/recommendation/salsa/{user}?walks={walks}&walk_length={walk_length}&limit={limit}"
            response = rq.get(url)
            counter = 0
            while response.status_code != 200 and counter != 9:
                print(f"Got response code {response.status_code} trying again {counter + 1}/10...")
                response = rq.get(url)
                counter += 1

            response_body = response.json() if response.status_code == 200 else []
            a_series = pd.Series([res['id'] for res in response_body])
            recommendations_series = recommendations_series.append(a_series, ignore_index=True)
        user_recommendation_df["recommendations"] = recommendations_series

        most_freq_rec_series = user_recommendation_df['recommendations'].value_counts().nlargest(limit)
        hits = list(most_freq_rec_series.values)
        most_frequent_recommendations = most_freq_rec_series.index.to_list()
        rows.append((hash_function, partition, user, most_frequent_recommendations, hits))

    return pd.DataFrame(rows, columns=["hash_function", "partition", "user", "recommendations", "hits"])

In [21]:
# Perform API requests
result_dfs = []

result = get_recommendations(user_df, walks, walk_length, limit, port)
result_dfs.append(result)

result_df = pd.concat(result_dfs)
display(result_df[["user", "recommendations", "hits", "partition"]])
result_df.to_json(output_baseline_recommendations, index=False, orient="table")

gathering recommendations from port: 5000 and partition 0 hash_function: Single


  recommendations_series = pd.Series()
  recommendations_series = pd.Series()


Unnamed: 0,user,recommendations,hits,partition
0,1104808931891793920,"[1367572664047591424, 1367538607968432129, 136...","[10, 9, 8, 6, 5, 4, 4, 3, 3, 2]",0
1,742613688448864256,"[1367554394892419078, 1367858453293711370, 136...","[10, 10, 10, 10, 3, 2, 2, 2, 2, 2]",0
2,1345404930094374912,"[1365952122563399680, 1364526878833410048, 136...","[10, 10, 10, 10, 9, 7, 6, 6, 5, 4]",0
3,1063441789590429696,"[1367485887324565506, 1367454413246365700, 136...","[10, 10, 10, 10, 10, 10, 10, 10, 6, 5]",0
4,2232404250,"[1366796462194065411, 1367327244968943617, 136...","[10, 10, 10, 7, 7, 7, 6, 6, 6, 5]",0
...,...,...,...,...
495,740472581581475840,"[1367924006377099272, 1366433266904018948, 136...","[10, 10, 10, 10, 10, 10, 9, 8, 8, 4]",0
496,28675507,"[1365418240940281859, 1355702091377860616, 136...","[10, 10, 10, 9, 8, 8, 7, 6, 5, 5]",0
497,909017503,"[1368891982718832643, 1336727444443832322, 136...","[10, 10, 10, 10, 10, 10, 9, 6, 5, 3]",0
498,1227709406839619584,"[1360431248229847043, 1362098305468035072, 135...","[7, 6, 4, 4, 4, 4, 4, 2, 2, 2]",0
