In [11]:
import sys
sys.path.append("../")

import os
import logging
import json
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold
import torch
import torch.nn.functional as F
from source.evaluate_models import evaluate_model
from source.losses import preference_loss_function,preference_loss_function_2
from source.mlp import MLP
from source.training import train_reward_model
from source.datasets import TabularDataset

import re

from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier



In [12]:
# Input paths
synthetic_preferences_path = "D:\\Work\\EleutherAI\\fairness_gym\\ml-fairness-gym\\fixed_gpt_preferences_formatted_all.csv"

# Output paths
## Fair
fair_reward_model_name = f"fair_reward_model"
fair_training_curve_path_prefix = f"../data/fair_reward_model_loss"
fair_model_eval_path = f"../data/fair_reward_model_eval.json"
## Greedy
greedy_reward_model_name = f"greedy_reward_model"
greedy_training_curve_path_prefix = f"../data/greedy_reward_model_loss"
greedy_model_eval_path = f"../data/greedy_reward_model_eval.json"

# Schema
## For the input dataframe for reward modelling
state_action_features = ['default_rate-group_1', 'default_rate-group_2', 
                         'acceptance_rate-group_1', 'acceptance_rate-group_2', 
                         'average_credit_score-group_2', 'average_credit_score-group_1',
                         'applicant_credit_group',
                         'applicant_group_membership',
                         'agent_action']

target = "target"


In [22]:
def n_sample_trajectory_df(input_df, n_sample_points = 20):
    trajectory_df = input_df.copy()
    if len(trajectory_df)//2 > n_sample_points:
        sample_step_size = len(trajectory_df) // n_sample_points
        trajectory_df = trajectory_df.reset_index(drop=True)
        sample_index = [i for i in range(len(trajectory_df)) if i%sample_step_size == 0]
        trajectory_df_sampled = trajectory_df[trajectory_df.index.isin(sample_index)]
        return trajectory_df_sampled
    else:
        return trajectory_df

def split_fair_greedy_preferences(synthetic_preferences_df):
    static_cols = ["Trajectory_A", "Trajectory_B"]
    fair_cols = [i for i in synthetic_preferences_df.columns if 'fair' in i.lower()]
    greedy_cols = [i for i in synthetic_preferences_df.columns if 'greedy' in i.lower()]
    # Splitting
    fair_preferences_df = synthetic_preferences_df[static_cols + fair_cols]
    greedy_preferences_df = synthetic_preferences_df[static_cols + greedy_cols]
    # Drop NaN
    fair_preferences_df = fair_preferences_df.dropna(subset=fair_cols)
    greedy_preferences_df = greedy_preferences_df.dropna(subset=greedy_cols)
    # Normalize columns naming conventions
    fair_preferences_df.columns = [i.strip("_fair").lower() for i in fair_preferences_df.columns]
    greedy_preferences_df.columns = [i.strip("_greedy").lower() for i in greedy_preferences_df.columns]
    return fair_preferences_df, greedy_preferences_df

def extract_trajectory_data(preferences_df, state_action_features, n_sample_points = 20):

    preferences_df_formatted = pd.DataFrame()
    modelling_df = preferences_df.copy()
    for idx, row in tqdm(modelling_df.iterrows(), total=len(modelling_df)):

        option_a_file, option_b_file = row['trajectory_a'], row['trajectory_b']
        tmp_df_a = pd.read_csv(f"../data/trajectories/{option_a_file}", index_col=[0])
        tmp_df_b = pd.read_csv(f"../data/trajectories/{option_b_file}", index_col=[0])

        tmp_df_a_sampled = n_sample_trajectory_df(tmp_df_a, n_sample_points=n_sample_points)
        tmp_df_b_sampled = n_sample_trajectory_df(tmp_df_b, n_sample_points=n_sample_points)

        tmp_df_a_sampled = tmp_df_a_sampled[state_action_features]
        tmp_df_b_sampled = tmp_df_b_sampled[state_action_features]
        
        tmp_df_a_sampled.columns = [f"{i}_a" for i in tmp_df_a_sampled.columns]
        tmp_df_b_sampled.columns = [f"{i}_b" for i in tmp_df_a_sampled.columns]

        tmp_df = pd.concat([tmp_df_a_sampled, tmp_df_b_sampled], axis=1)
        tmp_df["target"] = 1 if row['decision'] == "b" else 0
        preferences_df_formatted = preferences_df_formatted.append(tmp_df)

    preferences_df_formatted = preferences_df_formatted.reset_index(drop=True)    
    state_action_features_extended = list(preferences_df_formatted.columns)[:-1]
    return preferences_df_formatted, state_action_features_extended

def preference_loss_function_3(sum_a, sum_b, decisions):
    '''
    sum_a -> batch_size, 1
    sum_b -> batch_size, 1
    '''
    stacked_tensor = torch.cat([sum_a, sum_b], dim=1)
    stacked_tensor = stacked_tensor.to(torch.float32)
    decisions = decisions.to(torch.float32)
    loss = F.cross_entropy(stacked_tensor, decisions)
    return loss
    
def train_reward_model_wrapper(preferences_df_formatted, 
                               state_action_features,
                               target,
                               preference_loss_function,
                               model_hidden_config = [64, 64], 
                               num_epochs=10,
                               reward_model_name="reward_model",
                               kfold=3,
                               batch_size=256,
                               model_eval_path="data/model_eval_report.json"
                              ):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    target = "target"
    
    X = preferences_df_formatted[state_action_features]
    y = preferences_df_formatted[target]

    skf = StratifiedKFold(n_splits=kfold)

    metrics_report_history = {}
    losses_history = {}
    for i, (train_index, test_index) in enumerate(skf.split(X, y.apply(str))):
        print(f"Fold {i}")
        train_df = preferences_df_formatted[preferences_df_formatted.index.isin(train_index)]
        test_df = preferences_df_formatted[preferences_df_formatted.index.isin(test_index)]
        X_train, y_train = train_df[state_action_features].to_numpy(), train_df[target].to_numpy()
        
        # Get dataloader
        training_dataset = TabularDataset(
            features=X_train, targets=y_train, device=device
            )
        training_loader = torch.utils.data.DataLoader(
                training_dataset, batch_size=batch_size, shuffle=True
            )
        
         
        input_dim = len(state_action_features) // 2
        reward_model = MLP(name=reward_model_name, 
                           layer_dims=[X_train.shape[1]//2] + model_hidden_config + [1], 
                           out_act=None)
        
        losses = train_reward_model(
            reward_model,
            input_dim,
            training_loader,
            loss_function=preference_loss_function_3,
            learning_rate=0.0001,
            num_epochs=num_epochs,
            batch_size=256,
            save_dir="./models/")
        losses_history[i] = losses
    
        # K-Fold testing
        X_test, y_test = test_df[state_action_features].to_numpy(), test_df[target].to_numpy()
        metrics_report, cm_df = evaluate_model(reward_model, X_test, y_test)
        metrics_report_history[i] = metrics_report

    return metrics_report_history, losses_history

def get_accuracy_result(metrics_report_history):
    acc_list = []
    for k, v in metrics_report_history.items():
        acc_list.append(v['accuracy'])

    print("Mean (5-fold): ", np.array(acc_list).mean())
    print("Std (5-fold): ", np.array(acc_list).std())
    return acc_list

def display_kfold_metric_report(metrics_report_history):
    for k, v in metrics_report_history.items():
        print(k)
        display(pd.DataFrame(v))


In [4]:
# synthetic_preferences_df = pd.read_csv(synthetic_preferences_path).head(100)
# fair_preferences_df, greedy_preferences_df = split_fair_greedy_preferences(synthetic_preferences_df)

# # fair_preferences_df_formatted = extract_trajectory_data(fair_preferences_df, n_sample_points = 20)
# # greedy_preferences_df_formatted = extract_trajectory_data(greedy_preferences_df, n_sample_points = 20)

# fair_preferences_df_formatted, state_action_features_extended = extract_trajectory_data(fair_preferences_df, 
#                                                         state_action_features,
#                                                         n_sample_points = 20)



100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:06<00:00, 16.37it/s]


In [16]:
# sum_a = torch.rand(20,1)
# sum_b = torch.rand(20,1)
# decisions = torch.rand(20)
# decisions = F.one_hot(decisions.to(torch.int64), num_classes=2)

# stacked_tensor = torch.cat([sum_a, sum_b], dim=1)
# stacked_tensor = stacked_tensor.to(torch.float32)
# decisions = decisions.to(torch.float32)
# loss = F.cross_entropy(stacked_tensor, decisions)
# print(stacked_tensor.size())

In [23]:
fair_metrics_report_history, fair_losses_history = train_reward_model_wrapper(fair_preferences_df_formatted, 
                                                           state_action_features_extended,
                                                           target,
                                                           preference_loss_function,
                                                           num_epochs=5,
                                                           model_hidden_config = [32, 32],
                                                           reward_model_name="reward_model",
                                                           kfold=5,
                                                           batch_size = 256
                                                          )

Fold 0


 ... (more hidden) ...
 ... (more hidden) ...
 ... (more hidden) ...
 ... (more hidden) ...
 ... (more hidden) ...


Fold 1


 ... (more hidden) ...
 ... (more hidden) ...
 ... (more hidden) ...
 ... (more hidden) ...
 ... (more hidden) ...


Fold 2


 ... (more hidden) ...
 ... (more hidden) ...
 ... (more hidden) ...
 ... (more hidden) ...
 ... (more hidden) ...


Fold 3


 ... (more hidden) ...
 ... (more hidden) ...
 ... (more hidden) ...
 ... (more hidden) ...
 ... (more hidden) ...


Fold 4


 ... (more hidden) ...
 ... (more hidden) ...
 ... (more hidden) ...
 ... (more hidden) ...
 ... (more hidden) ...


In [24]:
print(fair_metrics_report_history)

{0:            precision  recall  f1-score  support  accuracy
0               0.28    0.49      0.35    132.0      0.49
1               0.59    0.36      0.45    268.0      0.36
avg/total       0.44    0.42      0.40    400.0      0.42, 1:            precision  recall  f1-score  support  accuracy
0               0.30    0.55      0.39    132.0      0.55
1               0.63    0.37      0.47    268.0      0.37
avg/total       0.46    0.46      0.43    400.0      0.46, 2:            precision  recall  f1-score  support  accuracy
0               0.23    0.45      0.30    132.0      0.45
1               0.47    0.24      0.32    268.0      0.24
avg/total       0.35    0.34      0.31    400.0      0.34, 3:            precision  recall  f1-score  support  accuracy
0               0.33    0.58      0.43    132.0      0.58
1               0.68    0.43      0.53    268.0      0.43
avg/total       0.50    0.50      0.48    400.0      0.50, 4:            precision  recall  f1-score  support  acc

***
## End of notebook