In [52]:
import pandas as pd
import rbo

In [53]:
# Parameters
baseline_recommendations = None
sampled_recommendations = None

In [55]:
# Import data
baseline_df = pd.read_json(baseline_recommendations)
sampled_df = pd.read_json(sampled_recommendations)

In [None]:
# Merge datasets
df = pd.merge(baseline_df, sampled_df, on="user", how="inner", suffixes=["_base", "_sampled"])

# Check all users are present after merge
assert(len(df) == len(baseline_df))

df.head()

In [None]:
# Compute common recommended tweets (without respecting rank order):
def set_overlap(row):
    base = set(row["recommendations_base"])
    sampled = set(row["recommendations_sampled"])
    return len(base.intersection(sampled)) / len(base) if len(base) > 0 else 0

df["set_overlap"] = df.apply(set_overlap, axis=1)
df[df["status_code_base"] == 200]["set_overlap"].describe()

In [None]:
# Compute count of missing recommendations
def missing_recommendations(row):
    base = set(row["recommendations_base"])
    sampled = set(row["recommendations_sampled"])
    return len(base) - len(base.intersection(sampled))

df["missing_recommendations"] = df.apply(missing_recommendations, axis=1)
df[df["status_code_base"] == 200]["missing_recommendations"].describe()

In [None]:
# Compute rank biased overlap for similarity between ranked lists
def rank_biased_overlap(row):
    base = row["recommendations_base"]
    sampled = row["recommendations_sampled"]
    result = rbo.RankingSimilarity(base, sampled).rbo()
    return result
    
df["rank_biased_overlap"] = df.apply(rank_biased_overlap, axis=1)
df[df["status_code_base"] == 200]["rank_biased_overlap"].describe()