In [27]:
import sys
sys.path.append("../")

import json
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold
import torch

from source.evaluate_models import evaluate_model
from source.losses import preference_loss_function, preference_loss_function_2 
from source.mlp import MLP
from source.training import train_reward_model

import re

from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier


In [32]:
import torch.nn.functional as F
a = torch.rand(4,1)
b = torch.rand(4,1)

c = torch.cat([a,b], dim=1)
d = F.softmax(c, dim=1)
print(d.size())

torch.Size([4, 2])


In [None]:
# from xgboost import XGBClassifier
# from sklearn.model_selection import train_test_split
# from sklearn.metrics import accuracy_score


In [41]:
# Input paths
synthetic_preferences_path = "D:\\Work\\EleutherAI\\fairness_gym\\ml-fairness-gym\\fixed_gpt_preferences_formatted_all.csv"

# Output paths
## Fair
fair_reward_model_name = f"fair_reward_model"
fair_training_curve_path_prefix = f"../data/fair_reward_model_loss"
fair_model_eval_path = f"../data/fair_reward_model_eval.json"
## Greedy
greedy_reward_model_name = f"greedy_reward_model"
greedy_training_curve_path_prefix = f"../data/greedy_reward_model_loss"
greedy_model_eval_path = f"../data/greedy_reward_model_eval.json"

# Schema
## For the input dataframe for reward modelling
state_action_features = ['Trajectory_A', 'Trajectory_B', 'default_rate-group_1', 'default_rate-group_2', 
                         'acceptance_rate-group_1', 'acceptance_rate-group_2', 
                         'average_credit_score-group_2', 'average_credit_score-group_1',
                         'applicant_credit_group',
                         'applicant_group_membership',
                         'agent_action']

target = "target"


In [44]:
def n_sample_trajectory_df(input_df, n_sample_points = 20):
    trajectory_df = input_df.copy()
    if len(trajectory_df)//2 > n_sample_points:
        sample_step_size = len(trajectory_df) // n_sample_points
        trajectory_df = trajectory_df.reset_index(drop=True)
        sample_index = [i for i in range(len(trajectory_df)) if i%sample_step_size == 0]
        trajectory_df_sampled = trajectory_df[trajectory_df.index.isin(sample_index)]
        return trajectory_df_sampled
    else:
        return trajectory_df

def split_fair_greedy_preferences(synthetic_preferences_df):
    static_cols = ["Trajectory_A", "Trajectory_B"]
    fair_cols = [i for i in synthetic_preferences_df.columns if 'fair' in i.lower()]
    greedy_cols = [i for i in synthetic_preferences_df.columns if 'greedy' in i.lower()]
    # Splitting
    fair_preferences_df = synthetic_preferences_df[static_cols + fair_cols]
    greedy_preferences_df = synthetic_preferences_df[static_cols + greedy_cols]
    # Drop NaN
    fair_preferences_df = fair_preferences_df.dropna(subset=fair_cols)
    greedy_preferences_df = greedy_preferences_df.dropna(subset=greedy_cols)
    # Normalize columns naming conventions
    fair_preferences_df.columns = [i.strip("_fair").lower() for i in fair_preferences_df.columns]
    greedy_preferences_df.columns = [i.strip("_greedy").lower() for i in greedy_preferences_df.columns]
    return fair_preferences_df, greedy_preferences_df
    
def extract_trajectory_data(preferences_df, n_sample_points = 20):

    preferences_df_formatted = pd.DataFrame()
    modelling_df = preferences_df.copy()
    for idx, row in tqdm(modelling_df.iterrows(), total=len(modelling_df)):

        option_a_file, option_b_file = row['trajectory_a'], row['trajectory_b']
        tmp_df_a = pd.read_csv(f"../data/trajectories/{option_a_file}", index_col=[0])
        tmp_df_b = pd.read_csv(f"../data/trajectories/{option_b_file}", index_col=[0])

        tmp_df_a_sampled = n_sample_trajectory_df(tmp_df_a, n_sample_points=n_sample_points)
        tmp_df_b_sampled = n_sample_trajectory_df(tmp_df_b, n_sample_points=n_sample_points)

        tmp_df_a_sampled = tmp_df_a_sampled[state_action_features]
        tmp_df_a_sampled["target"] = 1 if row['decision'] == "a" else 0

        tmp_df_b_sampled = tmp_df_b_sampled[state_action_features]
        tmp_df_b_sampled["target"] = 1 if row['decision'] == "b" else 0

        tmp_df = pd.concat([tmp_df_a_sampled, tmp_df_b_sampled], axis=0)
        preferences_df_formatted = preferences_df_formatted.append(tmp_df)

    preferences_df_formatted = preferences_df_formatted.reset_index(drop=True)    
    return preferences_df_formatted

def get_trajectories_comparison_df(df,
                                  state_action_features,
                                  target,
                                  trajecotry_folder="../data/trajectories", 
                                  n_sample_points=20
                                 ):
    res_df = pd.DataFrame()
    modelling_df = df.copy()
    for idx, row in tqdm(modelling_df.iterrows(), total=len(modelling_df)):
        print(row)
        option_a_file = row['Trajectory_A']
        option_b_file = row['Trajectory_B']
        tmp_df_a = pd.read_csv(f"{trajecotry_folder}/{option_a_file}", index_col=[0])
        tmp_df_b = pd.read_csv(f"{trajecotry_folder}/{option_b_file}", index_col=[0])

        tmp_df_a_sampled = n_sample_trajectory_df(tmp_df_a, n_sample_points=n_sample_points)
        tmp_df_b_sampled = n_sample_trajectory_df(tmp_df_b, n_sample_points=n_sample_points)    
        tmp_df_a_sampled = tmp_df_a_sampled[state_action_features]
        tmp_df_b_sampled = tmp_df_b_sampled[state_action_features]

        tmp_df_a_sampled.columns = [f"{i}_a" for i in tmp_df_a_sampled.columns]
        tmp_df_b_sampled.columns = [f"{i}_b" for i in tmp_df_b_sampled.columns]
        tmp_df = pd.concat([tmp_df_a_sampled, tmp_df_b_sampled], axis=1)

        tmp_df[target] = 0 if row[target] == 'a' else 1  
        res_df = res_df.append(tmp_df)
    res_df = res_df.reset_index(drop=True)
    return res_df
    
def train_reward_model_wrapper(preferences_df_formatted, 
                               state_action_features,
                               target,
                               preference_loss_function,
                               model_hidden_config = [64, 64], 
                               num_epochs=10,
                               reward_model_name="reward_model",
                               kfold=3,
                               model_eval_path="./model_eval_report.json"
                              ):
    target = "target"
    X = preferences_df_formatted[state_action_features]
    y = preferences_df_formatted[target]

    skf = StratifiedKFold(n_splits=kfold)

    metrics_report_history = {}
    losses_history = {}
    for i, (train_index, test_index) in enumerate(skf.split(X, y.apply(str))):
        print(f"Fold {i}")
        train_df = preferences_df_formatted[preferences_df_formatted.index.isin(train_index)]
        test_df = preferences_df_formatted[preferences_df_formatted.index.isin(test_index)]

        # Training
        X_train, y_train = train_df[state_action_features], train_df[target]
        X_test, y_test = test_df[state_action_features], test_df[target]
        
        train_df = pd.concat([X_train, y_train], axis=1)
        test_df = pd.concat([X_test, y_test], axis=1)
        # Attach the trajectories to the file names
        train_trajectories = get_trajectories_comparison_df(train_df,
                                  state_action_features,
                                  target=target,
                                  trajecotry_folder="../data/trajectories", 
                                  n_sample_points=20
                                 )
        test_trajectories = get_trajectories_comparison_df(test_df,
                                  state_action_features,
                                  target=target,
                                  trajecotry_folder="../data/trajectories", 
                                  n_sample_points=20
                                 )
        input_dim = (train_trajectories.shape[1]-1)//2
        reward_model = MLP(name=reward_model_name, 
                           layer_dims=[len(state_action_features)+1] + model_hidden_config + [1],
                           out_act=None)
        y_train = train_trajectories[target].to_numpy()
        X_train = train_trajectories.drop(target, axis=1)
#         X_train = train_trajectories[state_action_features].to_numpy()
        losses = train_reward_model(
                    model,
                    input_dim,
                    X_train,
                    y_train,
                    loss_function=preference_loss_function_2,
                    learning_rate=0.0001,
                    num_epochs=num_epochs,
                    batch_size=256,
                    save_dir="./models/"
                )
        losses_history[i] = losses
    
        # K-Fold testing
        
#         predictions, metrics_report = evaluate_model(reward_model, test_features, test_decisions)
        y_pred = model.predict(X_test)
        # evaluate predictions
        confusion_mat = metrics.confusion_matrix(y_test, y_pred)
        metrics_report = metrics.classification_report(y_test, y_pred, output_dict=True)
        metrics_report["confusion_matrix"] = str(confusion_mat)
        metrics_report_history[i] = metrics_report

    # the json file where the output must be stored
    with open(model_eval_path, "w") as f:
        json.dump(metrics_report_history, f)

    return metrics_report_history, losses_history

def get_accuracy_result(metrics_report_history):
    acc_list = []
    for k, v in metrics_report_history.items():
        acc_list.append(v['accuracy'])

    print("Mean (5-fold): ", np.array(acc_list).mean())
    print("Std (5-fold): ", np.array(acc_list).std())
    return acc_list

def display_kfold_metric_report(metrics_report_history):
    for k, v in metrics_report_history.items():
        print(k)
        display(pd.DataFrame(v))


In [22]:
synthetic_preferences_df = pd.read_csv(synthetic_preferences_path)
fair_preferences_df, greedy_preferences_df = split_fair_greedy_preferences(synthetic_preferences_df)

fair_preferences_df_formatted = extract_trajectory_data(fair_preferences_df, n_sample_points = 20)
greedy_preferences_df_formatted = extract_trajectory_data(greedy_preferences_df, n_sample_points = 20)


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4838/4838 [03:48<00:00, 21.16it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4814/4814 [03:46<00:00, 21.24it/s]


In [45]:
fair_metrics_report_history, fair_losses_history = train_reward_model_wrapper(fair_preferences_df_formatted, 
                                                                               state_action_features,
                                                                               target,
                                                                               preference_loss_function,
                                                                               num_epochs=3,
                                                                               model_hidden_config = [32, 32],
                                                                               reward_model_name="reward_model",
                                                                               kfold=2
                                                                              )

KeyError: "['Trajectory_A', 'Trajectory_B'] not in index"

In [12]:
fair_acc_list = get_accuracy_result(fair_metrics_report_history)


Mean (5-fold):  0.5093065316246383
Std (5-fold):  0.0020721372467961685


In [13]:
print(fair_metrics_report_history)

{0: {'0': {'precision': 0.5080645161290323, 'recall': 0.6724137931034483, 'f1-score': 0.5787985865724382, 'support': 48720}, '1': {'precision': 0.5055762081784386, 'recall': 0.33971690258118237, 'f1-score': 0.4063745019920319, 'support': 48040}, 'accuracy': 0.5072343943778421, 'macro avg': {'precision': 0.5068203621537355, 'recall': 0.5060653478423154, 'f1-score': 0.49258654428223503, 'support': 96760}, 'weighted avg': {'precision': 0.5068291056913874, 'recall': 0.5072343943778421, 'f1-score': 0.4931924164273088, 'support': 96760}, 'confusion_matrix': '[[32760 15960]\n [31720 16320]]'}, 1: {'0': {'precision': 0.5113092341741355, 'recall': 0.6686165845648604, 'f1-score': 0.5794768253742362, 'support': 48720}, '1': {'precision': 0.5115125109678981, 'recall': 0.3519150707743547, 'f1-score': 0.41696365811249086, 'support': 48040}, 'accuracy': 0.5113786688714345, 'macro avg': {'precision': 0.5114108725710168, 'recall': 0.5102658276696075, 'f1-score': 0.49822024174336355, 'support': 96760}, 

In [14]:
greedy_metrics_report_history, greedy_losses_history = train_reward_model_wrapper(greedy_preferences_df_formatted, 
                                                                               state_action_features,
                                                                               target,
                                                                               preference_loss_function,
                                                                               num_epochs=3,
                                                                               model_hidden_config = [32, 32],
                                                                               reward_model_name="reward_model",
                                                                               kfold=2
                                                                              )


Fold 0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Fold 1


In [15]:
greedy_acc_list = get_accuracy_result(greedy_metrics_report_history)


Mean (5-fold):  0.5205130868300789
Std (5-fold):  0.00929580390527629


In [16]:
print(greedy_metrics_report_history)

{0: {'0': {'precision': 0.5298088907353552, 'recall': 1.0, 'f1-score': 0.6926471586665761, 'support': 51010}, '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 45270}, 'accuracy': 0.5298088907353552, 'macro avg': {'precision': 0.2649044453676776, 'recall': 0.5, 'f1-score': 0.34632357933328806, 'support': 96280}, 'weighted avg': {'precision': 0.28069746070222756, 'recall': 0.5298088907353552, 'f1-score': 0.3669706228041342, 'support': 96280}, 'confusion_matrix': '[[51010     0]\n [45270     0]]'}, 1: {'0': {'precision': 0.5248584015103839, 'recall': 0.8174867673005293, 'f1-score': 0.6392764065613982, 'support': 51010}, '1': {'precision': 0.44682115270350564, 'recall': 0.16611442456372874, 'f1-score': 0.24219001610305962, 'support': 45270}, 'accuracy': 0.5112172829248026, 'macro avg': {'precision': 0.48583977710694476, 'recall': 0.491800595932129, 'f1-score': 0.44073321133222887, 'support': 96280}, 'weighted avg': {'precision': 0.48816598092991675, 'recall': 0.5112172829

***
## End of notebook