# Data Generation for "Technical Report - Assessing a Formal Model of Reflective Equilibrium"

In [1]:
# imports 

from rethon import GlobalREEnsembleGenerator, ReflectiveEquilibrium, REState, StandardGlobalReflectiveEquilibrium
from rethon.util import standard_model_params_varied_alphas
from tau.util import create_random_argument_list, tau_dump,tau_load, random_positions

from tau import (
    StandardPosition, 
    DAGDialecticalStructure, 
    DialecticalStructure, 
    BitarrayPosition, 
    BDDDialecticalStructure
)
from tau.util import inferential_density

from os import getcwd, path
from pathlib import Path

from typing import List

import pandas as pd
from pandas import Series

from ast import literal_eval
import tarfile

from math import floor
from datetime import datetime

In [2]:
# relative path to data directory
data_dir = path.join(Path(getcwd()).parent.absolute(), "data")
print(data_dir)
#output_directory = path.join(path.dirname(getcwd()), "data")

/home/sebastian/Nextcloud/Documents/mindmaps/events/projekte/re/reflective-equilibrium/github-repository/re-technical-report/data


## Creating dialectical structures and initial commitments

In [3]:
# All relevant parameters for generating the data set

### Dialectical structures
sentence_pool_sizes = [6,7,8,9]
n_tau_per_sentence_pool = 2
max_n_premises = 1
### Initial commitments
# number of initial commitment (for each sentence pool)
n_init_coms = 2



# TEST SET
sentence_pool_sizes = [6,7]
n_tau_per_sentence_pool = 4
max_n_premises = 1
### Initial commitments
# number of initial commitment (for each sentence pool)
n_init_coms = 10

data_set_name = f"re_data_{datetime.now().strftime('%Y%m%d')}"


In [4]:
# creation of random dialectical structures
tau_dict = {}
i = 1
for n_sentences in sentence_pool_sizes:
    # just for keeping track
    args_list = []
    while len(args_list) < n_tau_per_sentence_pool:

        arguments = create_random_argument_list(n_arguments_min=n_sentences-2, 
                                                n_arguments_max=n_sentences+1,
                                                n_sentences=n_sentences, 
                                                n_premises_max=max_n_premises)
        
        tau = BDDDialecticalStructure(n_sentences, arguments)

        # used sentences
        used_sentences = set(abs(s) for arg in arguments for s in arg)
        # check inferential density and usage of all sentences
        if (0.15<=inferential_density(tau)<=0.5) and (len(used_sentences)==n_sentences):           
            tau_name = f'{data_set_name}_tau_{i:02d}'
            tau.set_name(tau_name)
            tau_dict[tau_name]= tau
            args_list.append(arguments)
            #print(arguments)
            i+=1

print(tau_dict)

{'re_data_20231108_tau_01': <tau.core.BDDDialecticalStructure object at 0x7ff04a032fa0>, 're_data_20231108_tau_02': <tau.core.BDDDialecticalStructure object at 0x7ff04853aa30>, 're_data_20231108_tau_03': <tau.core.BDDDialecticalStructure object at 0x7ff0486857f0>, 're_data_20231108_tau_04': <tau.core.BDDDialecticalStructure object at 0x7ff04853f9a0>, 're_data_20231108_tau_05': <tau.core.BDDDialecticalStructure object at 0x7ff04853f250>, 're_data_20231108_tau_06': <tau.core.BDDDialecticalStructure object at 0x7ff04853adf0>, 're_data_20231108_tau_07': <tau.core.BDDDialecticalStructure object at 0x7ff04853f460>, 're_data_20231108_tau_08': <tau.core.BDDDialecticalStructure object at 0x7ff0484d95e0>}


In [5]:
# saving dialectical structure as json file
overwrite = True
output_file_path = path.join(data_dir, f'{data_set_name}_taus.json')
if path.exists(output_file_path) and not overwrite:
    raise RuntimeError(f'Datafile {output_file_path} already exists. Remove file or set `overwrite` to true.')
print(f'Saving data set {data_set_name} into {output_file_path}.')
with open(file=output_file_path, mode='w') as output_file:
    tau_dump(tau_dict, output_file, indent=4, serialize_implementation=True)

Saving data set re_data_20231108 into /home/sebastian/Nextcloud/Documents/mindmaps/events/projekte/re/reflective-equilibrium/github-repository/re-technical-report/data/re_data_20231108_taus.json.


In [6]:
# generating random initials commitments
# for each sentence pool we generate `n_init_coms` initial commitments

init_coms_dict = {n_sentence_pool: 
                  random_positions(n_sentences=n_sentence_pool, k=n_init_coms) for 
                  n_sentence_pool in sentence_pool_sizes}

print(init_coms_dict) 


{6: [{1, 2, 3, 5, -6}, {3, 4, -1, -2}, {1, 3, 5, -6, -2}, {1, 5, -6, -3, -2}, {2, 5, -6, -4, -1}, {2, 5, 4, -3}, {1, -2, 6}, {2, 3, 6, -5, -1}, {1, 2, 3, -5}, {1, -5, 4}], 7: [{3, 7, -2, -6, -5, -1}, {5, -7, -6, -3, -1}, {2, -7, -6, -4, -3}, {2, -6, -5, -4, -1}, {1, 6, 7, -5, -4}, {5, 4, -3, 7}, {1, -5, -4, -7}, {5, -1, 6}, {-7, -3, -1, 6}, {1, 3, -2, 6}]}


In [7]:
# serializing into a JSON file
output_file_path = path.join(data_dir, f'{data_set_name}_coms.json')
if path.exists(output_file_path) and not overwrite:
    raise RuntimeError(f'Datafile {output_file_path} already exists. Remove file or set over_write to true.')
print(f'Saving data set {data_set_name} into {output_file_path}.')

with open(file=output_file_path, mode='w') as output_file:
    tau_dump(init_coms_dict, output_file, indent=4, serialize_implementation=True)

Saving data set re_data_20231108 into /home/sebastian/Nextcloud/Documents/mindmaps/events/projekte/re/reflective-equilibrium/github-repository/re-technical-report/data/re_data_20231108_coms.json.


## RE ensemble runs

### Loading data

In [3]:
## load dialectical structures
data_set_name = 're_data_20231108'
tau_file_path = path.join(data_dir, f'{data_set_name}_taus.json')

print(f'Loading taus from: {tau_file_path}')
input_file_path = tau_file_path
with open(file=input_file_path, mode='r') as input_file:
    tau_dict = tau_load(input_file, use_json_specified_type=True)
s_pools = {tau.sentence_pool().size() for tau in tau_dict.values()}
# we create equivalence classes of dialectical structures according to sentence pool size
taus_by_n = {n_sentence_pool:[] for n_sentence_pool in s_pools}
for tau in tau_dict.values():
    taus_by_n[tau.sentence_pool().size()].append(tau)

print(f'Number of taus in file: {len(tau_dict.values())}')
print(f'Sentence pools: {s_pools}')
#print(f'Tau names (keys): {list(tau_dict.keys())}')
#print(taus_by_n)

Loading taus from: /home/sebastian/Nextcloud/Documents/mindmaps/events/projekte/re/reflective-equilibrium/github-repository/re-technical-report/data/re_data_20231108_taus.json
Number of taus in file: 8
Sentence pools: {6, 7}


In [4]:
# loading initial commitments
coms_file_path = path.join(data_dir, f'{data_set_name}_coms.json')

print(f'Loading init coms from: {coms_file_path}')
input_file_path = coms_file_path
with open(file=input_file_path, mode='r') as input_file:
    coms_dict = tau_load(input_file)
print(coms_dict)

Loading init coms from: /home/sebastian/Nextcloud/Documents/mindmaps/events/projekte/re/reflective-equilibrium/github-repository/re-technical-report/data/re_data_20231108_coms.json
{'6': [[1, 2, 3, 5, -6], [3, 4, -1, -2], [1, 3, 5, -6, -2], [1, 5, -6, -3, -2], [2, 5, -6, -4, -1], [2, 5, 4, -3], [1, -2, 6], [2, 3, 6, -5, -1], [1, 2, 3, -5], [1, -5, 4]], '7': [[3, 7, -2, -6, -5, -1], [5, -7, -6, -3, -1], [2, -7, -6, -4, -3], [2, -6, -5, -4, -1], [1, 6, 7, -5, -4], [5, 4, -3, 7], [1, -5, -4, -7], [5, -1, 6], [-7, -3, -1, 6], [1, 3, -2, 6]]}


### Process parameters and used models

In [5]:
# 
# data_set_name = 're_data_20231105'
output_file_name = f'{data_set_name}_process_results.csv'

# RE process parameters
track_branches = True

# RE weights
model_parameters_list = []

# weight resolution: 0.10
#for model_parameters in standard_model_params_varied_alphas(9):
# testing 
for model_parameters in standard_model_params_varied_alphas(3):
    # ToDo: Discuss with Andreas
    # rounding
    for weight_name in model_parameters["weights"].keys():
    
        model_parameters["weights"][weight_name] = round(model_parameters["weights"][weight_name],2)
        
    model_parameters_list.append(model_parameters)

#model_parameters_list

# model variants

implementations = [# globally searching algorithm with quadratic G-function 
                   {'tau_module_name': 'tau',
                    'position_class_name':'StandardPosition',
                    'dialectical_structure_class_name': 'DAGDialecticalStructure',
                    'reflective_equilibrium_module_name': 'rethon',
                    'reflective_equilibrium_class_name': 'StandardGlobalReflectiveEquilibrium'},
                   # globally searching algorithm with linear G-function
                   {'tau_module_name': 'tau',
                    'position_class_name':'StandardPosition',
                    'dialectical_structure_class_name': 'DAGDialecticalStructure',
                    'reflective_equilibrium_module_name': 'rethon',
                    'reflective_equilibrium_class_name': 'StandardGlobalReflectiveEquilibriumLinearG'},
                   # locally searching algorithm with quadratic G-function 
                   {'tau_module_name': 'tau',
                    'position_class_name':'StandardPosition',
                    'dialectical_structure_class_name': 'BDDDialecticalStructure',
                    'reflective_equilibrium_module_name': 'rethon',
                    'reflective_equilibrium_class_name': 'StandardLocalReflectiveEquilibriumWithGO'},
                   # locally searching algorithm with linear G-function 
                   {'tau_module_name': 'tau',
                    'position_class_name':'StandardPosition',
                    'dialectical_structure_class_name': 'BDDDialecticalStructure',
                    'reflective_equilibrium_module_name': 'rethon',
                    'reflective_equilibrium_class_name': 'StandardLocalReflectiveEquilibriumLinearGWithGO'},
                   ]


In [6]:
# ToDiscuss: I guess we only need a custom generator if we want to vary/save parameters from the LocalREModel (like search depth)

class CustomEnsembleGenerator(GlobalREEnsembleGenerator):
    pass

### Running ensembles and saving results

In [7]:

for n_sentences in s_pools:
    
    # dialectical structures in inferential density range
    arguments_list = [tau.get_arguments() for tau in taus_by_n[n_sentences]]
    coms_list = coms_dict[str(n_sentences)]
    print(coms_list)

    ensemble_gen = CustomEnsembleGenerator(arguments_list = arguments_list, 
                                           n_sentence_pool = n_sentences,
                                           initial_commitments_list = coms_list,
                                           model_parameters_list = model_parameters_list,
                                           implementations = implementations,
                                           create_branches = track_branches,
                                           max_branches = 150)


    ensemble_gen.ensemble_items_to_csv(
                                  output_file_name = output_file_name,
                                  output_dir_name =  data_dir,
                                  archive = True, # save the csv as archived tar.gz
                                  save_preliminary_results = True, # will create preliminary csv-data sets 
                                  preliminary_results_interval = 100,
                                  append=True)

[[1, 2, 3, 5, -6], [3, 4, -1, -2], [1, 3, 5, -6, -2], [1, 5, -6, -3, -2], [2, 5, -6, -4, -1], [2, 5, 4, -3], [1, -2, 6], [2, 3, 6, -5, -1], [1, 2, 3, -5], [1, -5, 4]]
2023-11-08 10:08:47,397 [INFO] rethon: Starting ensemble generation with 480 models runs (without branches)
[[3, 7, -2, -6, -5, -1], [5, -7, -6, -3, -1], [2, -7, -6, -4, -3], [2, -6, -5, -4, -1], [1, 6, 7, -5, -4], [5, 4, -3, 7], [1, -5, -4, -7], [5, -1, 6], [-7, -3, -1, 6], [1, 3, -2, 6]]
2023-11-08 10:11:48,317 [INFO] rethon: Starting ensemble generation with 480 models runs (without branches)
