In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import math
import random
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import datetime as dt
import os

from scipy.spatial import distance

In [2]:
import matplotlib as mpl
mpl.use('svg')
new_rc_params = {
    "font.size": 20, #choosing the font size helps latex to place all the labels, ticks etc. in the right place
    "svg.fonttype": 'none',
    'figure.max_open_warning': 0
} 
mpl.rcParams.update(new_rc_params)
#     "font.weight":700
plt.rcParams['axes.facecolor'] = '0.95'

my_font_dict = {
        'size': 20,
        }

In [3]:
saving_dir = "simulations_paper_addition"
if not os.path.exists(saving_dir):
    os.makedirs(saving_dir)

# fix random seed for reproducibility
np.random.seed(7)
# set_random_seed(7)

In [4]:
def convert_to_array(csv_cell_string):
    """
    Gets a string that looks like an array, and returns an actual array
    """
    return np.fromstring(csv_cell_string.replace('[','').replace(']',''), sep=',')

In [5]:
# load simulation results
general_output = pd.read_csv(os.path.join(saving_dir,'general_output_softmax_values_V5.1_paper_addition.csv'),index_col=0)
qlearning_output = pd.read_csv(os.path.join(saving_dir, 'qlearning_output_softmax_values_V5.1_paper_addition.csv'),index_col=0)
no_reward_output = pd.read_csv(os.path.join(saving_dir,'no_reward_output_softmax_values_V5.1_paper_addition.csv'),index_col=0)

# convert to arrays
general_output_df = general_output.applymap(convert_to_array)
qlearning_output_df = qlearning_output.applymap(convert_to_array)
no_reward_output_df = no_reward_output.applymap(convert_to_array)

#### convetion : simulation input will be 1-4 , predictions are actually 0-3 and so converted to 1-4
##### (the model gets 0-3 and outpus 0-3)

In [7]:
def produce_comparison_df(full_df, maybe_full_df, dist_func):
    """
    Distance compare. Gets 2 dataframes (ideally holds the simulation results) and calculate a difference based on a distance function (dist_func).
    The 2 dataframes should be in the same shape.
    """
    softmax_compare = []
    for a_row, b_row in zip(full_df.to_numpy(), maybe_full_df.to_numpy()):
        row_predictions_compare = []
        for a_out, b_out in zip(a_row, b_row):
            row_predictions_compare.append(dist_func(a_out, b_out))
        softmax_compare.append(row_predictions_compare)
    predict_compare_df = pd.DataFrame(np.asmatrix(softmax_compare), index=full_df.index, columns=full_df.columns)
    return predict_compare_df

In [8]:
################## V5.1 FULL #############################
#  Also, not plotting one_different pattern row
##########################################################
# Patterns:
constant_group = ['1 1 1 1', '2 2 2 2', '3 3 3 3', '4 4 4 4']
one_different = ['2 2 2 1', '4 2 2 2', '2 2 2 4', '1 2 2 2', '3 2 2 2', '2 3 3 3', '2 1 1 1']
repeating_two = ['1 2 1 2', '2 1 2 1', '2 3 2 3', '3 2 3 2', '4 3 4 3', '3 4 3 4']
all_different = ['1 2 3 4', '4 3 2 1', '2 3 4 1', '1 4 3 2', '3 4 1 2', '2 1 4 3', '4 1 2 3', '3 2 1 4']

all_patterns_groups = [constant_group, one_different, repeating_two, all_different]


# Rewards:
constant_group_rew = ['10 10 10 10', '15 15 15 15', '20 20 20 20', '25 25 25 25', '30 30 30 30', '35 35 35 35',
                      '40 40 40 40', '45 45 45 45', '50 50 50 50', '55 55 55 55', '60 60 60 60', '65 65 65 65',
                      '70 70 70 70', '75 75 75 75', '80 80 80 80', '85 85 85 85', '90 90 90 90']

ascending_rew = ['10 20 30 40', '15 20 25 30', '10 30 50 70', '10 40 70 90', '30 35 40 45', '45 50 55 65',
                 '50 55 65 75', '55 65 75 85', '65 75 85 95', '75 80 85 90', '10 15 20 25', '25 45 65 85',
                 '40 60 80 90', '60 70 80 90', '20 30 70 80', '40 50 60 70', '50 60 70 80']

descending_rew = ['90 80 70 60', '80 70 60 50', '70 60 50 40', '60 50 40 30', '50 40 30 20', '40 30 20 10',
                  '90 70 50 30', '70 50 30 10', '95 75 55 35', '75 55 35 15', '80 60 40 20', '85 65 45 25',
                  '90 70 40 10', '90 60 30 10', '90 50 30 10', '90 40 30 20', '90 30 20 10', '60 30 20 10']


one_different_rew_good = ['10 10 10 90', '20 20 20 90', '30 30 30 90', '40 40 40 90', '50 50 50 90', '60 60 60 90', 
                          '10 10 90 10', '20 20 90 20', '30 30 90 30', '40 40 90 40', '50 50 90 50', '20 80 20 20',
                          '30 90 30 30', '60 90 60 60', '80 20 20 20', '90 30 30 30', '90 40 40 40', '90 50 50 50',
                          '20 20 20 80', '40 40 40 80', '60 60 60 80', '20 20 20 40', '40 40 40 60', '20 20 20 60',
                          '20 20 80 20', '40 40 80 40', '60 60 80 40', '20 20 40 20', '40 40 60 40', '20 20 60 20',
                          '20 80 20 20', '40 80 40 40', '60 80 60 60', '20 40 20 20', '40 60 40 40', '20 60 20 20',
                          '80 20 20 20', '80 40 40 40', '80 60 60 60', '40 20 20 20', '60 40 40 40', '60 20 20 20']


one_different_rew_bad = ['90 90 90 10', '90 90 90 20', '90 90 90 30', '90 90 90 40', '90 90 90 50', '90 90 90 60',
                         '90 90 10 90', '90 90 20 90', '90 90 30 90', '90 90 40 90', '90 90 50 90', '80 20 80 80',
                         '90 30 90 90', '90 60 90 90', '20 80 80 80', '30 90 90 90', '40 90 90 90', '50 90 90 90',
                         '80 80 80 20', '80 80 80 40', '80 80 80 60', '40 40 40 20', '60 60 60 40', '60 60 60 20',
                         '80 80 20 80', '80 80 40 80', '80 80 60 80', '40 40 20 40', '60 60 40 60', '60 60 20 60',
                         '80 20 80 80', '80 40 80 80', '80 60 80 80', '40 20 40 40', '60 40 60 60', '60 20 60 60',
                         '20 80 80 80', '40 80 80 80', '60 80 80 80', '20 40 40 40', '40 60 60 60', '20 60 60 60']


all_rewards_groups = [constant_group_rew, ascending_rew, descending_rew, one_different_rew_good, one_different_rew_bad]

In [9]:
all_rewards = []
for i in all_rewards_groups:
    for j in i:
        all_rewards.append(j)
        
all_patterns = []
for i in all_patterns_groups:
    for j in i:
        all_patterns.append(j)

In [10]:
def get_res_table(compare_df, reward_groups, pattern_groups, dim=(4,7), remove_one_diff_pattern=True):
    """
    compare_df: output from produce_comparison_df()
    reward_groups, pattern_groups: the input groups for the simulation.
    Return the average of the groups, ready to be plotted
    """
    res = np.zeros(dim)
    for col_ind, col in enumerate(reward_groups):

        for group_ind, group in enumerate(pattern_groups):
            group_avg = []
            for idx in group:
                group_avg.append(np.average(compare_df.loc[idx,col]))

            res[group_ind,col_ind] = np.average(group_avg)

    res = pd.DataFrame(res,index=['constant','one different','repeating two','all different'],
                   columns=['constant reward','ascending reward','descending reward','one different good reward', 'one different bad reward'])
    if remove_one_diff_pattern:
        res.drop(index='one different', inplace=True)

    return res

In [11]:
def produce_consent_table(res_table, title):
#     title = "custom prediction comparison - Generated vs Original data - IN GROUPS V4"
    my_pcolor_font_dict = {
        'size': 36,
        'weight': 'bold',
    }
    xlabel= "Reward group"
    ylabel= "Pattern group"
    data =  np.asarray(res_table)
    text_val = np.asmatrix(res_table)
    plt.clf()
    fig = plt.gcf()
    plt.figure(figsize=(30, 15))
    plt.title(title)
    plt.xlabel(xlabel,fontsize=26)
    plt.ylabel(ylabel,fontsize=26)
    plt.xticks(range(0,res_table.shape[1]),res_table.columns, fontsize=20)
    y_ticks=plt.yticks(range(0,res_table.shape[0]),res_table.index, fontsize=20)
    c = plt.pcolor(data, edgecolors='k', linewidths=4, cmap='RdBu', vmin=0.0, vmax=1.0)

    def show_values(pc, fmt="%.2f", **kw):
        pc.update_scalarmappable()
        ax = pc.axes
        it = np.nditer(text_val[::,:].ravel(), flags=['f_index'])
        for p, color, value in zip(pc.get_paths(), pc.get_facecolors(), pc.get_array()):
            x, y = p.vertices[:-2, :].mean(0)

            if np.all(color[:3] > 0.5):
                color = (0.0, 0.0, 0.0)
            else:
                color = (1.0, 1.0, 1.0)
            ax.text(x, y, format(it[0],'.3f'), ha="center", va="center", color=color,fontsize=26,fontdict=my_pcolor_font_dict, **kw)
            it.iternext()
    show_values(c)

    plt.colorbar(c, aspect=10, pad=0.02)
    
    plt.savefig(os.path.join(saving_dir, title.replace(' ','_')+'.svg'),quality=95,dpi=200, format='svg')

 - General vs reward-oriented
 - General vs no-reward
 - Reward Oriented vs no-reward 

### no reward vs general

In [12]:
# stretch no-reward to be the same shape as general
for col in general_output_df.columns:
    no_reward_output_df[col] = no_reward_output_df['output']
no_reward_output_df.drop(columns=['output'],inplace=True)

  no_reward_output_df[col] = no_reward_output_df['output']


In [13]:
nr_gen_compare_df = produce_comparison_df(general_output_df, no_reward_output_df, dist_func=distance.euclidean)

In [14]:
nr_gen_compare_df.to_csv(os.path.join(saving_dir,'reward_oblivious_vs_general_distance.csv'))

In [15]:
nr_gen_compare_res = get_res_table(nr_gen_compare_df,reward_groups=all_rewards_groups,pattern_groups=all_patterns_groups, dim=(4,5))

In [16]:
nr_gen_compare_res.to_csv(os.path.join(saving_dir,'reward_oblivious_vs_general_distance_group_avg.csv'))

In [17]:
produce_consent_table(nr_gen_compare_res, title="No-Reward VS General model distance")

  plt.savefig(os.path.join(saving_dir, title.replace(' ','_')+'.svg'),quality=95,dpi=200, format='svg')


-------------------------

## qlearning model vs others

In [19]:
gen_qlearn_compare_df = produce_comparison_df(general_output_df, qlearning_output_df, dist_func=distance.euclidean)
gen_qlearn_compare_df.to_csv(os.path.join(saving_dir,'general_vs_reward_oriented_distance.csv'))
gen_qlearn_compare_res = get_res_table(gen_qlearn_compare_df,reward_groups=all_rewards_groups,pattern_groups=all_patterns_groups, dim=(4,5))
gen_qlearn_compare_res.to_csv(os.path.join(saving_dir,'general_vs_reward_oriented_distance_group_avg.csv'))

no_reward_qlearn_compare_df = produce_comparison_df(no_reward_output_df, qlearning_output_df, dist_func=distance.euclidean)
no_reward_qlearn_compare_df.to_csv(os.path.join(saving_dir,'reward_oblivious_vs_reward_oriented_distance_group_avg.csv'))
no_reward_qlearn_compare_res = get_res_table(no_reward_qlearn_compare_df,reward_groups=all_rewards_groups,pattern_groups=all_patterns_groups, dim=(4,5))
# no_reward_qlearn_compare_res.to_csv(os.path.join(saving_dir,'reward_oblivious_vs_reward_oriented_distance.csv'))

produce_consent_table(gen_qlearn_compare_res, title="General VS Qlearning model distance")

produce_consent_table(no_reward_qlearn_compare_res, title="No-Reward VS Qlearning model distance")
