In [39]:
%reload_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings('ignore')
from logging import getLogger
from pathlib import Path
import os
import sys
sys.path.append(os.pardir)

import numpy as np
import pandas as pd 
from tqdm import tqdm
from sklearn.utils import check_random_state

import matplotlib.pyplot as plt
from utils import fix_seed, empty_metrics
from run import run_dynamic_match


from synthetic_data import generate_data, generate_reward_data, train_model
import conf

In [27]:
variable = "lambda_"

In [28]:
T=conf.T
n_x=conf.n_x
n_y=conf.n_y

In [None]:
import os
logger = getLogger(__name__)
logger.info(f"The current working directory is {Path().cwd()}")

# log path
log_path = Path(f"../result/{variable}")
df_path = log_path / "df"
df_path.mkdir(exist_ok=True, parents=True)


# DataFrame to store results of all seeds
all_data = pd.DataFrame(columns=["seed", variable, "t", "method", "match_x", "match_y", "active_users_x", "active_users_y", "user_retain_x", "user_retain_y", "true_user_retain_x", "true_user_retain_y"])


for seed in tqdm(range(conf.num_seeds), desc="Processing seeds"):
    random_ = check_random_state(conf.random_state + 1 + seed)
    reward_data = generate_reward_data(
        dim = conf.dim,
        T = conf.T,
        alpha_param=conf.alpha_param,
        beta_param=conf.beta_param,
        random_state = conf.random_state)

    model = train_model(
        dim = conf.dim,
        T = conf.T,
        n_train = conf.n_train,
        reward_data = reward_data,
        alpha_param=conf.alpha_param,
        beta_param=conf.beta_param,
        random_state = conf.random_state + 1 + seed)
    
    dataset = generate_data(
        n_x = conf.n_x,
        n_y = conf.n_y,
        dim = conf.dim,
        rel_noise = conf.rel_noise,
        T = conf.T,
        K = conf.K,
        kappa=conf.kappa,
        reward_data=reward_data,
        alpha_param=conf.alpha_param,
        beta_param=conf.beta_param,
        random_state = conf.random_state + 1 + seed,
        random_=random_,
        ) 
    

    static_results_by_method = {}
    for method in conf.method_list:
        if method == "FairCo":
            continue
        # Train fixed model

        # Empty result dict
        results = {method: empty_metrics(conf.T, conf.n_x, conf.n_y)}
        run_dynamic_match(
            dataset,
            model=model,
            proportion=conf.proportion,
            reward_type=conf.reward_type,
            ranking_metric=conf.ranking_metric,
            results=results,
            noise=conf.noise,
            random_state=conf.random_state + 1 + seed,
        )
        static_results_by_method[method] = results[method]

    # === Execute for all lambdas ===
    for lambda_ in tqdm(conf.lambda_list, desc="Processing lambda"):
        temp_data = []
        for method in conf.method_list:
            if method == "FairCo":
                results = {method: empty_metrics(conf.T, conf.n_x, conf.n_y)}
                run_dynamic_match(
                    dataset,
                    model=model,
                    proportion=conf.proportion,
                    reward_type=conf.reward_type,
                    ranking_metric=conf.ranking_metric,
                    results=results,
                    noise=conf.noise,
                    lambda_=lambda_,
                    random_state=conf.random_state + 1 + seed,
                )
                metrics = results[method]
            else:
                metrics = static_results_by_method[method]

            # Record results (format as before)
            for t in range(1, conf.T):
                temp_data.append({
                    "seed": seed,
                    variable: lambda_,
                    "t": t,
                    "method": method,
                    "match_x": metrics["match_x"][t].mean(),
                    "match_y": metrics["match_y"][t].mean(),
                    "exposure_x": metrics["exposure_x"][t].mean(),
                    "exposure_y": metrics["exposure_y"][t].mean(),
                    "fair_x": metrics["fair_x"][t].mean(),
                    "fair_y": metrics["fair_y"][t].mean(),
                    "active_users_x": metrics["active_users_x"][t].mean(),
                    "active_users_y": metrics["active_users_y"][t].mean(),
                    "user_retain_x": metrics["user_retain_x"][t].mean(),
                    "user_retain_y": metrics["user_retain_y"][t].mean(),
                    "true_user_retain_x": metrics["true_user_retain_x"][t].mean(),
                    "true_user_retain_y": metrics["true_user_retain_y"][t].mean(),
                    "mse_user_retain_x": ((metrics["true_user_retain_x"][t] - metrics["user_retain_x"])**2).mean(),
                    "mse_user_retain_y": ((metrics["true_user_retain_y"][t] - metrics["user_retain_y"])**2).mean(),
                })

        all_data = pd.concat([all_data, pd.DataFrame(temp_data)], ignore_index=True)

# Save
all_data["t"] = pd.to_numeric(all_data["t"], errors="coerce")
all_data.to_csv(df_path / "all_data_results.csv", index=False)


In [30]:
all_data = pd.read_csv(df_path / "all_data_results.csv")

In [None]:
from visualization_seed_variable import run_visual

run_visual(
    all_data=all_data,
    variable=variable,
    n_x=conf.n_x,
    n_y=conf.n_y,
    T=conf.T,
    x_log_scale=True
)