In [None]:
%reload_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings('ignore')
from logging import getLogger
from pathlib import Path
import os
import sys
sys.path.append(os.pardir)

import numpy as np
import pandas as pd 
from tqdm import tqdm
from sklearn.utils import check_random_state

import matplotlib.pyplot as plt
from utils import fix_seed, empty_metrics
from run import run_dynamic_match
from visualization_seed import plot_match_per, plot_number_user_retain, plot_user_retain

from synthetic_data import generate_data, generate_reward_data, train_model
import conf
import os
import pickle
logger = getLogger(__name__)
logger.info(f"The current working directory is {Path().cwd()}")


In [None]:
color_dict = {
    "MRet": "tab:red",
    "Optimal":  "tab:blue",
    "MRet (best)":  "tab:blue",
    "Uniform":  "tab:green",
    "Max Match":    "tab:cyan",
    "FairCo": "tab:pink",
    "FairCo (equal exposure)": "blueviolet"
    # brown, cyan, grey, pink, purple, olive
}

import matplotlib.pyplot as plt
plt.style.use("ggplot")
fig, ax = plt.subplots(figsize=(10, 2)) 

for estimator_name in ["Max Match", "FairCo", "FairCo (equal exposure)", "Uniform", "MRet", "MRet (best)"]:
    ax.plot([], [], color=color_dict[estimator_name], marker='o', label=estimator_name, markersize=13, linewidth=5)


ax.legend(loc="center", ncol=len(conf.show_method_list), fontsize=18)
ax.axis('off')  

fig.subplots_adjust(left=0.8, right=0.9, top=0.2, bottom=0.1) 
save_path = Path(f"../fig/legend.png")
if save_path is not None:
    save_path.parent.mkdir(parents=True, exist_ok=True) 
    plt.savefig(save_path, dpi=300, bbox_inches="tight")
    print(f"Figure saved to {save_path}")

In [None]:
import matplotlib.pyplot as plt
plt.style.use("ggplot")
fig, ax = plt.subplots(figsize=(10, 2)) 

for estimator_name in ['FairCo', 'MRet (best)']:
    ax.plot([], [], color=color_dict[estimator_name],label=estimator_name, markersize=13, linewidth=5, alpha=0.5)


ax.legend(loc="center", ncol=len(conf.show_method_list), fontsize=18)
ax.axis('off')  

fig.subplots_adjust(left=0.8, right=0.9, top=0.2, bottom=0.1) 
save_path = Path(f"../fig/legend_hist.png")
if save_path is not None:
    save_path.parent.mkdir(parents=True, exist_ok=True) 
    plt.savefig(save_path, dpi=300, bbox_inches="tight")
    print(f"Figure saved to {save_path}")

In [None]:
# How does MRet perform as the timestep $t$ increases?

log_path = Path("../result/T")
df_path = log_path / "df"
all_data = pd.read_csv(df_path / "all_data_results.csv")
from visualization_seed import plot_match_and_user_retain


plot_match_and_user_retain(
    all_data,  # DataFrame
    side="both",  # one of "x", "y", "both"
    n_x=1000,  # Number of users on x side
    n_y=1000,  # Number of users on y side
    figsize=(18, 6),  # Size of the entire graph
    x_log_scale=False,  # Whether to use log scale for x-axis
    y_sig_digits=None,
    legend_ncol=8,  # Number of columns in legend
    save_path=None  # Save path (If None, do not save)
)


In [None]:
# How does FairCo perform under the equal-exposure fairness criterion? 
save_path = Path(f"../fig/synthetic_T_exp.png")
plot_match_and_user_retain(
    all_data,  
    side="both",  
    n_x=n_x,  
    n_y=n_y,  
    figsize=(18, 6),  
    x_log_scale=False, 
    y_sig_digits=None,
    legend_ncol=8, 
    save_path=save_path  
)

In [None]:
# Why does Fairco underperform in user retention?
log_path = Path("../result/T")
df_path = log_path / "df"
all_data = pd.read_csv(df_path / "all_data_results.csv")
with open(df_path/"results.pkl", "rb") as f:
    results = pickle.load(f)

from visualization_seed import plot_histogram
method_list = conf.method_list
T=conf.T
n_x=conf.n_x
n_y=conf.n_y

save_path = Path(f"../fig/hist.png")
plot_histogram(results, method_list=['MRet (best)','FairCo (lam=100)'], metric="active_match", T=T-1, xlabel="number of matches", save_path=save_path, figsize=(8,6))
save_path = Path(f"../fig/hist_optimal.png")
plot_histogram(results, method_list=['MRet (best)','FairCo (lam=100)'], metric="effective_active_match", T=T-1, xlabel="number of matches", save_path=save_path, figsize=(8,6))

In [None]:
from visualization_seed import plot_match_and_user_retain
from visualization_seed_variable import plot_match_and_user_retain_variable


In [None]:
# How does MRet perform when user popularity varies?
variable = "kappa"
log_path = Path(f"../result/{variable}")
df_path = log_path / "df"
all_data = pd.read_csv(df_path / "all_data_results.csv")
save_path = Path(f"../fig/synthetic_{variable}.png")

plot_match_and_user_retain_variable(
    all_data=all_data,
    variable=variable,
    n_x=conf.n_x,
    n_y=conf.n_x,
    T=conf.T,
    x_log_scale=False,
    figsize=(18, 6),
    save_path=save_path  # Specify save location
)

In [None]:
# How does the proposed method perform when varying the number of users?
variable = "n_xy"
log_path = Path(f"../result/{variable}")
df_path = log_path / "df"
all_data = pd.read_csv(df_path / "all_data_results.csv")
save_path = Path(f"../fig/synthetic_{variable}.png")

plot_match_and_user_retain_variable(
    all_data=all_data,
    variable=variable,
    n_x=conf.n_x,
    n_y=conf.n_x,
    T=conf.T,
    x_log_scale=True,
    figsize=(18, 6),
    save_path=save_path  # Specify save location
)

In [None]:
# How does the hyperparameter of FairCo affect its performance?
variable = "lambda_"
log_path = Path(f"../result/{variable}")
df_path = log_path / "df"
all_data = pd.read_csv(df_path / "all_data_results.csv")
save_path = Path(f"../fig/synthetic_{variable}.png")

plot_match_and_user_retain_variable(
    all_data=all_data,
    variable=variable,
    n_x=conf.n_x,
    n_y=conf.n_x,
    T=conf.T,
    x_log_scale=True,
    figsize=(18, 6),
    save_path=save_path  # Specify save location
)

In [None]:
# How does MRet perform under varying noise levels in the match probabilities?
variable = "rel_noise"
log_path = Path(f"../result/{variable}")
df_path = log_path / "df"
all_data = pd.read_csv(df_path / "all_data_results.csv")
save_path = Path(f"../fig/synthetic_{variable}.png")

plot_match_and_user_retain_variable(
    all_data=all_data,
    variable=variable,
    n_x=conf.n_x,
    n_y=conf.n_x,
    T=conf.T,
    x_log_scale=False,
    figsize=(18, 6),
    save_path=save_path  # Specify save location
)

In [None]:
# How does MRet perform when popularity drifts over time? 
log_path = Path("../result/time_popularity")
df_path = log_path / "df"
all_data = pd.read_csv(df_path / "all_data_results.csv")
save_path = Path(f"../fig/synthetic_time_popularity.png")
plot_match_and_user_retain(
    all_data,  
    side="both",  
    n_x=n_x,  
    n_y=n_y,  
    figsize=(18, 6),  
    x_log_scale=False, 
    y_sig_digits=None,
    legend_ncol=8, 
    save_path=save_path  
)

In [None]:
# How accurate is MRet as an approximation to the Optimal method?
log_path = Path("../result/T_optimal")
df_path = log_path / "df"
all_data = pd.read_csv(df_path / "all_data_results.csv")

save_path = Path(f"../fig/synthetic_T_optimal.png")
plot_match_and_user_retain(
    all_data,  
    side="both",  
    n_x=n_x,  
    n_y=n_y,  
    figsize=(18, 6),  
    x_log_scale=False, 
    y_sig_digits=None,
    legend_ncol=8, 
    save_path=save_path  
)