In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math
import seaborn as sns
import heapq
import csv
import sys
from copy import copy
import os 
module_path = os.path.abspath(os.path.join('../../RL_algorithms'))
sys.path.insert(0, module_path)
from Utils import tsplot, find_filenames_with_extension
from tqdm import tqdm
from scipy.stats import kendalltau
import matplotlib.patches as mpatches
import itertools
import heapq
plt.rcParams.update({'font.size': 16})

In [6]:
BASE_DIR = "../../User_Studies/Expert-User-Study/user_tests/"
BASE = "../saved_reward_fn_performances/AAAI_Experiments/"

last_user_id = None
users = range(0,30)

In [4]:
DEEP_LOCS = ["UserRewardFnReRunWithDefaultParams/PPO",
             "UserRewardFnReRunWithDefaultParams/A2C",
             "UserRewardFnReRunWithDefaultParams/DDQN",
             "UserRewardFnReRunWithDefaultParams/Q_Learn"]
DIRS_LOCS = DEEP_LOCS

DIR_LABELS = {"QLearnAlpha0pt25": "Alpha=0.25",
              "QLearnBaseline": "Baseline",
              "QLearnGamma0pt8": "Gamma=0.8",
              "QLearnGamma0pt5": "Gamma=0.5",
              "UserRewardFnReRunWithDefaultParams/PPO": "PPO",
              "UserRewardFnReRunWithDefaultParams/A2C": "A2C",
              "UserRewardFnReRunWithDefaultParams/DDQN": "DDQN",
              "UserRewardFnReRunWithDefaultParams/Q_Learn": "QLearn"
              }

def read_csv_into_heapq(h, csv_filename, line_limit=10):
    """
    Read csv into a heapq

    heapq documentation: https://docs.python.org/3/library/heapq.html
    :param heapq: h
    :param csv_filename: string
    :param line_limit: int, the number of trials to read in 
    :return: heapq
    """
    csv_file = open(csv_filename, "r")
    csv_reader = csv.reader(csv_file, delimiter=',')
    hyper_params = next(csv_reader)  # gets the first line (which just records hyperparams)
    reward_fn = next(csv_reader) # gets the second line (which just records the reward fn)
    performance = []
    for idx, row in enumerate(csv_reader):
        if idx >= line_limit:
            continue
        running_total = 0
        row = [[running_total := running_total + eval(x)[0]-1] for x in row]  # subtract 1 since every ep starts -H
        performance.append(row)

    data = np.array(performance)
    if len(data.shape) == 3:
        data = np.concatenate(data, axis=-1)
        data = np.mean(data, axis=-1)
        
    reward_fn = [eval(elem)[1] for elem in reward_fn]
    heapq.heappush(h, (data[-1], str(reward_fn)))
    
li = []

DIRS = []
import os
print (os.getcwd())

for dir in DIRS_LOCS:
    h = []

    csv_files = find_filenames_with_extension(BASE + dir, "csv")
    for file in tqdm(csv_files):
        read_csv_into_heapq(h, BASE + dir + "/" + file)
    DIRS.append(DIR_LABELS[dir])
    df = pd.DataFrame(h, columns=[DIR_LABELS[dir], 'Reward Function'])
    li.append(df)
    del h
    

cumulative_df = li[0]
join_cols = ["Reward Function"]
color_cm = plt.get_cmap("viridis")

for i in range(1, len(DIRS)):
    cumulative_df = cumulative_df.merge(li[i], on=join_cols)
cumulative_df['PPO_rank'] = cumulative_df['PPO'].rank(ascending=False)
cumulative_df['QLearn_rank'] = cumulative_df['QLearn'].rank(ascending=False)
cumulative_df['DDQN_rank'] = cumulative_df['DDQN'].rank(ascending=False)
cumulative_df['A2C_rank'] = cumulative_df['A2C'].rank(ascending=False)

/home/serena/AAAI_23_reward_design/Experiments/plotting


100%|████████████████████████████████████████| 107/107 [00:18<00:00,  5.67it/s]
100%|████████████████████████████████████████| 107/107 [00:18<00:00,  5.76it/s]
100%|████████████████████████████████████████| 107/107 [00:19<00:00,  5.57it/s]
100%|████████████████████████████████████████| 107/107 [00:19<00:00,  5.45it/s]


In [7]:
with open(BASE_DIR + "/user_overfitting.csv", 'w', newline="") as overfitting_save_csv:
    overfitting_csv_writer = csv.writer(overfitting_save_csv, delimiter=',',
                        quotechar='"', quoting=csv.QUOTE_MINIMAL)
    overfitting_csv_writer.writerow(['User', 
                                     'Reward Fn', 
                                     'Valid?', 
                                     'Selected?', 
                                     'DDQN Rank', 
                                     'DDQN Mean', 
                                     'PPO Rank', 
                                     'PPO Mean', 
                                     'A2C Rank', 
                                     'A2C Mean', 
                                     'QLearn Rank', 
                                     'QLearn Mean', 
                                     ])
        
    for user in users:
        print ("User ", user)
        with open(BASE_DIR + "/" +  'all_reward_fns.csv', newline='') as csvf:
            csvreader = csv.reader(csvf, delimiter=',', quotechar='"')
            for idx, row in enumerate(csvreader):
                if idx == 0:
                    continue
                user_id = row[0]  
                selected_agent = row[7]
                if user_id == str(user):
                    reward_fn = list(eval(row[2]).values())
                    reward_fn_idx = cumulative_df.loc[cumulative_df['Reward Function'] == str(reward_fn)]
                    if not reward_fn_idx.empty: 
                        overfitting_csv_writer.writerow([user_id,
                                                        reward_fn,
                                                        'TRUE', 
                                                        selected_agent, 
                                                        int(reward_fn_idx["DDQN_rank"].values[0]),
                                                        int(reward_fn_idx["DDQN"].values[0]),
                                                        int(reward_fn_idx["PPO_rank"].values[0]),
                                                        int(reward_fn_idx["PPO"].values[0]),
                                                        int(reward_fn_idx["A2C_rank"].values[0]),
                                                        int(reward_fn_idx["A2C"].values[0]),
                                                        int(reward_fn_idx["QLearn_rank"].values[0]),
                                                        int(reward_fn_idx["QLearn"].values[0]),
                                                        ])
                    else:
                        overfitting_csv_writer.writerow([user_id, 
                                                         reward_fn,
                                                         'FALSE'])

User  0
User  1
User  2
User  3
User  4
User  5
User  6
User  7
User  8
User  9
User  10
User  11
User  12
User  13
User  14
User  15
User  16
User  17
User  18
User  19
User  20
User  21
User  22
User  23
User  24
User  25
User  26
User  27
User  28
User  29
