# Data analysis for Chapter "Commitment consistency cases"

In [1]:
# importing libraries
from os import getcwd, path
from pathlib import Path

from pandas import Series, DataFrame
import pandas as pd
import numpy as np
from typing import Set, List

from data_analysis_helper_fun import *


In [2]:
# Relevant dirs and file names

# relative path to data directory
data_dir = path.join(Path(getcwd()).parent.absolute(), "data")
data_set_name = 're_data_20231108_process_results'
data_file_name = f'{data_set_name}.csv.tar.gz'

tables_output_dir = path.join(Path(getcwd()).parent.absolute(), "report", "tables")
figures_output_dir = path.join(Path(getcwd()).parent.absolute(), "report", "figures")

## Loading data

In [3]:
re_data = load_re_data(data_dir, data_file_name)

In [4]:
print('\033[1m' + 'General information: ' + '\033[0m')
print("   Amount of rows:" , len(re_data))
print('   Number of dialectical structures:', len(re_data['ds'].unique()))
print('   Overall number of initial commitments:', len(re_data['init_coms'].unique()))
print('   Sentence-pools:' , re_data['n_sentence_pool'].unique())
print('   Models:' , re_data['model_name'].unique())
print('\n')

[1mGeneral information: [0m
   Amount of rows: 309
   Number of dialectical structures: 4
   Overall number of initial commitments: 4
   Sentence-pools: [6 7]
   Models: ['StandardGlobalReflectiveEquilibrium'
 'StandardGlobalReflectiveEquilibriumLinearG'
 'StandardLocalReflectiveEquilibrium'
 'StandardLocalReflectiveEquilibriumLinearG']




## Calculating cases

In [5]:
# the original helper functions
# labelling cases    
def case_names(x, go_or_fp):
    
    # global optima or fixed points?
    if go_or_fp == 'fp':
        column = 'fp_coms_consistent'
        n = x['n_fixed_points']
        
    elif go_or_fp == 'go':
        column = 'go_coms_consistent'
        n = x['n_global_optima']
    
    cases = []
    
    for i in range(n):
    
        if x['init_coms_dia_consistent']  and (not x[column][i]):
            cases.append('very_bad')

        elif x['init_coms_dia_consistent']  and x[column][i]:
            cases.append('good')

        elif (not x['init_coms_dia_consistent'])  and (not x[column][i]):
            cases.append('bad')

        elif (not x['init_coms_dia_consistent'])  and (x[column][i]):
            cases.append('very_good')
            
    return cases

def count_case(x, go_or_fp, case_name):
        # global optima or fixed points?
    if go_or_fp == 'fp':
        column = 'fp_cases'
        
    elif go_or_fp == 'go':
        column = 'go_cases'

    return x[column].count(case_name)

re_data['go_cases'] = re_data.apply(lambda row: case_names(row, 'go'), axis = 1)
re_data['fp_cases'] = re_data.apply(lambda row: case_names(row, 'fp'), axis = 1)

# alternative suggestion:
# we already use col names for counting since we later "group-by.sum()" anyway
re_data['fp_n_very_good_cases'] = re_data.apply(lambda row: (not row['init_coms_dia_consistent']) and row['fixed_point_coms_consistent'], axis = 1)
re_data['fp_n_good_cases'] = re_data.apply(lambda row: row['init_coms_dia_consistent'] and row['fixed_point_coms_consistent'], axis = 1)
re_data['fp_n_bad_cases'] = re_data.apply(lambda row: (not row['init_coms_dia_consistent']) and (not row['fixed_point_coms_consistent']), axis = 1)
re_data['fp_n_very_bad_cases'] = re_data.apply(lambda row: row['init_coms_dia_consistent'] and (not row['fixed_point_coms_consistent']), axis = 1)


display(re_data.head(6)[['fixed_point_coms', 'global_optima', 'go_coms_consistent', 
                         'fp_coms_consistent','go_cases', 'fp_cases', 
                         'fp_n_very_good_cases', 'fp_n_good_cases', 'fp_n_bad_cases', 'fp_n_very_bad_cases' ]])

Unnamed: 0,fixed_point_coms,global_optima,go_coms_consistent,fp_coms_consistent,go_cases,fp_cases,fp_n_very_good_cases,fp_n_good_cases,fp_n_bad_cases,fp_n_very_bad_cases
0,"{1, 2, 3, 6, -5, -4}","[({2, -4}, {1, 2, 3, 6, -5, -4})]",[False],[False],[bad],[bad],False,False,True,False
1,"{4, 5, 6, -3, -1}","[({6}, {4, 5, 6, -3, -1})]",[False],[False],[bad],[bad],False,False,True,False
2,"{1, 2, 3, -6, -5, -4}","[({-4}, {1, 2, 3, -6, -5, -4}), ({-4}, {1, 3, ...","[True, False]","[True, False]","[very_good, bad]","[very_good, bad]",True,False,False,False
3,"{1, 2, 3, 6, -5, -4}","[({-4}, {1, 2, 3, -6, -5, -4}), ({-4}, {1, 3, ...","[True, False]","[True, False]","[very_good, bad]","[very_good, bad]",False,False,True,False
4,"{4, 5, 6, -3, -1}","[({6}, {4, 5, 6, -3, -1}), ({6}, {1, 4, 5, 6, ...","[False, True]","[False, True]","[bad, very_good]","[bad, very_good]",False,False,True,False
5,"{1, 4, 5, 6, -3}","[({6}, {4, 5, 6, -3, -1}), ({6}, {1, 4, 5, 6, ...","[False, True]","[False, True]","[bad, very_good]","[bad, very_good]",True,False,False,False


In [6]:
result_df = re_data[['model_name', 'fp_n_very_good_cases', 'fp_n_good_cases', 'fp_n_bad_cases', 'fp_n_very_bad_cases']].groupby(['model_name']).agg('sum')
result_df['n_branches_calc'] = result_df.apply(lambda x: sum(x) , axis=1)
display(result_df)


Unnamed: 0_level_0,fp_n_very_good_cases,fp_n_good_cases,fp_n_bad_cases,fp_n_very_bad_cases,n_branches_calc
model_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
StandardGlobalReflectiveEquilibrium,29,48,24,0,101
StandardGlobalReflectiveEquilibriumLinearG,25,22,32,0,79
StandardLocalReflectiveEquilibrium,14,26,34,0,74
StandardLocalReflectiveEquilibriumLinearG,20,20,15,0,55


In [7]:
# 'n_branches' 'n_fixed_points'
cols_group_branches = ['model_name','ds','init_coms','weight_account', 'weight_systematicity', 'weight_faithfulness'] 
relevant_cols = ['model_name','n_branches', 'n_fixed_points']

# B: We remove duplicate, keep the first element and use the resulting index to get the other columns
#re_data[cols_group_branches].drop_duplicates().index
#re_data.loc[re_data[cols_group_branches].drop_duplicates().index]
re_data_collapsed_branches = re_data.loc[re_data[cols_group_branches].drop_duplicates().index]
#display(re_data_collapsed_branches[relevant_cols])

re_data_collapsed_branches[relevant_cols].groupby('model_name').sum()

Unnamed: 0_level_0,n_branches,n_fixed_points
model_name,Unnamed: 1_level_1,Unnamed: 2_level_1
StandardGlobalReflectiveEquilibrium,101,85
StandardGlobalReflectiveEquilibriumLinearG,79,79
StandardLocalReflectiveEquilibrium,74,58
StandardLocalReflectiveEquilibriumLinearG,55,32
