**Results for true gamma experiments**

In [231]:
import mlflow
from mlflow.tracking import MlflowClient
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [232]:
mlflow.set_tracking_uri('http://localhost:3336')
client = MlflowClient()

model_names = ['Efficient','Baseline_Minimax', 'DR', 'IPW']
experiment_names = ['Synthetic_'+ model_name for model_name in model_names]
experiments = {exp.name: exp.experiment_id for exp in client.search_experiments()}

In [233]:
metric_names = ['regret']
param_names = ['global_seed', 'gamma_data', 'n_samples_train']
data_dict = {}

for experiment_name in experiment_names:
    if experiment_name in experiments:
        experiment_id = experiments[experiment_name]
        # Get all runs for the experiment
        runs = client.search_runs(experiment_ids=[experiment_id])
        # Extract metrics and parameters
        runs_data = [
            {
                **run.data.metrics,
                **run.data.params,
                'run_id': run.info.run_id
            }
            for run in runs
        ]
        combined_df = pd.DataFrame(runs_data)
        
        if not combined_df.empty:
            # Filter the DataFrame to only include the desired metrics and parameters
            columns_to_include = ['run_id'] + metric_names + param_names
            filtered_df = combined_df[columns_to_include]
            data_dict[experiment_name.split("/")[0]] = filtered_df
        else:
            print(f"No runs found for experiment {experiment_name}.")
    else:
        print(f"Experiment {experiment_name} does not exist.")

efficient_estimator = data_dict['Synthetic_Efficient'].drop(columns=['run_id'])
baseline_estimator = data_dict['Synthetic_Baseline_Minimax'].drop(columns=['run_id'])
dr_estimator = data_dict['Synthetic_DR'].drop(columns=['run_id'])
ipw_estimator = data_dict['Synthetic_IPW'].drop(columns=['run_id'])

efficient_estimator['n_samples_train'] = pd.to_numeric(efficient_estimator['n_samples_train'], errors='coerce')
baseline_estimator['n_samples_train'] = pd.to_numeric(baseline_estimator['n_samples_train'], errors='coerce')
dr_estimator['n_samples_train'] = pd.to_numeric(dr_estimator['n_samples_train'], errors='coerce')
ipw_estimator['n_samples_train'] = pd.to_numeric(ipw_estimator['n_samples_train'], errors='coerce')

efficient_estimator['regret'] = pd.to_numeric(efficient_estimator['regret'], errors='coerce')
baseline_estimator['regret'] = pd.to_numeric(baseline_estimator['regret'], errors='coerce')
dr_estimator['regret'] = pd.to_numeric(dr_estimator['regret'], errors='coerce')
ipw_estimator['regret'] = pd.to_numeric(ipw_estimator['regret'], errors='coerce')

efficient_estimator['gamma'] = pd.to_numeric(efficient_estimator['gamma_data'], errors='coerce')
baseline_estimator['gamma'] = pd.to_numeric(baseline_estimator['gamma_data'], errors='coerce')
dr_estimator['gamma'] = pd.to_numeric(dr_estimator['gamma_data'], errors='coerce')
ipw_estimator['gamma'] = pd.to_numeric(ipw_estimator['gamma_data'], errors='coerce')

**Compute mean and standard deviation of regret over randomized policy** 

In [234]:

efficient_mean = efficient_estimator.groupby('gamma')['regret'].mean()
efficient_std = efficient_estimator.groupby('gamma')['regret'].std()

baseline_mean = baseline_estimator.groupby('gamma')['regret'].mean()
baseline_std = baseline_estimator.groupby('gamma')['regret'].std()

dr_mean = dr_estimator.groupby('gamma')['regret'].mean()
dr_std = dr_estimator.groupby('gamma')['regret'].std()

ipw_mean = ipw_estimator.groupby('gamma')['regret'].mean()
ipw_std = ipw_estimator.groupby('gamma')['regret'].std()


In [235]:

combined = pd.DataFrame({
    'baseline_mean': baseline_mean,
    'dr_mean': dr_mean,
    'ipw_mean': ipw_mean,
    'efficient_mean': efficient_mean
})

# Find the minimum regret across baseline_mean, dr_mean, and ipw_mean for each gamma
combined['min_regret'] = combined[['baseline_mean', 'dr_mean', 'ipw_mean']].min(axis=1)

# Calculate relative improvement of efficient_mean over the minimum regret
combined['relative_improvement'] = (combined['min_regret'] - combined['efficient_mean']) / combined['min_regret']

# Select only the relevant columns for output
result = combined[['relative_improvement']]
print(result)


# absolute improvement
combined['absolute_improvement'] = (combined['min_regret'] - combined['efficient_mean'])

# Select only the relevant columns for output
result = combined[['absolute_improvement']]
print(result)


       relative_improvement
gamma                      
1                  0.055312
2                  0.140889
3                  0.058375
4                 -0.410613
5                 -0.629454
6                 -1.230601
7                 -1.796844
8                 -2.007832
9                 -1.868140
10                -3.080515
11                -4.054472
12                -3.136798
13                -3.375028
14                -4.215271
15                -4.656638
16                -2.598421
       absolute_improvement
gamma                      
1                 -0.070960
2                 -0.184353
3                 -0.067273
4                  0.290858
5                  0.370840
6                  0.492651
7                  0.493304
8                  0.442144
9                  0.465468
10                 0.484720
11                 0.577207
12                 0.437396
13                 0.326474
14                 0.403582
15                 0.407692
16                 0