# Data Generation for "Technical Report - Assessing a Formal Model of Reflective Equilibrium"

In [10]:
# imports 

# to do: remove not needed imports 
from rethon import GlobalREEnsembleGenerator, ReflectiveEquilibrium, REState, StandardGlobalReflectiveEquilibrium
from rethon.util import standard_model_params_varied_alphas
from tau.util import create_random_argument_list, tau_dump,tau_load, random_positions

from tau import (
    StandardPosition, 
    DAGDialecticalStructure, 
    DialecticalStructure, 
    BitarrayPosition, 
    BDDDialecticalStructure
)
from tau.util import inferential_density

from os import getcwd, path
from pathlib import Path

from typing import List

import pandas as pd
from pandas import Series

from ast import literal_eval
import tarfile

from math import floor
from datetime import datetime

from model_variations import *

In [11]:
# relative path to data directory
data_dir = path.join(Path(getcwd()).parent.absolute(), "data")
print(data_dir)
#output_directory = path.join(path.dirname(getcwd()), "data")

/home/sebastian/Nextcloud/Documents/mindmaps/events/projekte/re/reflective-equilibrium/github-repository/re-technical-report/data


In [4]:


implementations = {# globally searching algorithm with quadratic G-function 
                   'QGRE':
                   {'tau_module_name': 'tau',
                    'position_class_name':'StandardPosition',
                    'dialectical_structure_class_name': 'DAGDialecticalStructure',
                    'reflective_equilibrium_module_name': 'rethon',
                    'reflective_equilibrium_class_name': 'StandardGlobalReflectiveEquilibrium'},
                   # globally searching algorithm with linear G-function
                   'LGRE':
                   {'tau_module_name': 'tau',
                    'position_class_name':'StandardPosition',
                    'dialectical_structure_class_name': 'DAGDialecticalStructure',
                    'reflective_equilibrium_module_name': 'model_variations',
                    'reflective_equilibrium_class_name': 'StandardGlobalReflectiveEquilibriumLinearG'},
                   # locally searching algorithm with quadratic G-function 
                   'QLRE':
                   {'tau_module_name': 'tau',
                    'position_class_name':'StandardPosition',
                    'dialectical_structure_class_name': 'BDDDialecticalStructure',
                    'reflective_equilibrium_module_name': 'model_variations',
                    'reflective_equilibrium_class_name': 'StandardLocalReflectiveEquilibriumWithGO'},
                   # locally searching algorithm with linear G-function 
                   'LLRE':
                   {'tau_module_name': 'tau',
                    'position_class_name':'StandardPosition',
                    'dialectical_structure_class_name': 'BDDDialecticalStructure',
                    'reflective_equilibrium_module_name': 'model_variations',
                    'reflective_equilibrium_class_name': 'StandardLocalReflectiveEquilibriumLinearGWithGO'},
                    }
list(implementations.values())

[{'tau_module_name': 'tau',
  'position_class_name': 'StandardPosition',
  'dialectical_structure_class_name': 'DAGDialecticalStructure',
  'reflective_equilibrium_module_name': 'rethon',
  'reflective_equilibrium_class_name': 'StandardGlobalReflectiveEquilibrium'},
 {'tau_module_name': 'tau',
  'position_class_name': 'StandardPosition',
  'dialectical_structure_class_name': 'DAGDialecticalStructure',
  'reflective_equilibrium_module_name': 'model_variations',
  'reflective_equilibrium_class_name': 'StandardGlobalReflectiveEquilibriumLinearG'},
 {'tau_module_name': 'tau',
  'position_class_name': 'StandardPosition',
  'dialectical_structure_class_name': 'BDDDialecticalStructure',
  'reflective_equilibrium_module_name': 'model_variations',
  'reflective_equilibrium_class_name': 'StandardLocalReflectiveEquilibriumWithGO'},
 {'tau_module_name': 'tau',
  'position_class_name': 'StandardPosition',
  'dialectical_structure_class_name': 'BDDDialecticalStructure',
  'reflective_equilibrium_mod

## Creating dialectical structures and initial commitments

In [20]:
# All relevant parameters for generating the data set

#### BETA SET (TEST SET) ##### 
sentence_pool_sizes = [6,7]
n_tau_per_sentence_pool = 2
max_n_premises = 1
# number of initial commitment (for each sentence pool)
n_init_coms = 3
tau_data_set_name = "tau_beta"
coms_data_set_name = "coms_beta"



In [2]:
sentence_pool_sizes = [6,7,8,9]
#### ALPHA SET #####
# mean n premises = 1.5
n_tau_per_sentence_pool = 20 # i.e., 80 structures in sum
max_n_premises = 2 # 1-2 premises per argument
# number of initial commitment (for each sentence pool)
n_init_coms = 20
#tau_data_set_name = "tau_alpha"
#coms_data_set_name = "coms_alpha"
#data_set_name = f"re_data_{datetime.now().strftime('%Y%m%d')}"


In [3]:
sentence_pool_sizes = [6,7,8,9]
#### ALPHA SET #####
weights_list = [[1,0], # mean n_premises = 1
                 [3,1], # mean n_premises = 1.25
                 [1,1], # mean n_premises = 1.5
                 [1,3], # mean n_premises = 1.75
                 [0,1] # mean n_premises = 2
                ]
                  
n_tau_per_sentence_pool = 10 # 10 taus for each sentence pool and each weight combination (i.e., 200 structures in sum)
max_n_premises = 2 # 1-2 premises per argument
# number of initial commitment (for each sentence pool)
n_init_coms = 20
#tau_data_set_name = "tau_alpha"
#coms_data_set_name = "coms_alpha"
#data_set_name = f"re_data_{datetime.now().strftime('%Y%m%d')}"


In [13]:
# creation of random dialectical structures
tau_dict = {}
i = 1
for n_sentences in sentence_pool_sizes:
    for weights in weights_list:
        # just for keeping track
        args_list = []
        while len(args_list) < n_tau_per_sentence_pool:

            arguments = create_random_argument_list(n_arguments_min=n_sentences-2, 
                                                    n_arguments_max=n_sentences+1,
                                                    n_sentences=n_sentences, 
                                                    n_premises_max=max_n_premises,
                                                    n_premises_weights=weights)

            tau = BDDDialecticalStructure(n_sentences, arguments)

            # used sentences
            used_sentences = set(abs(s) for arg in arguments for s in arg)
            # check inferential density and usage of all sentences
            if (0.15<=inferential_density(tau)<=0.5) and (len(used_sentences)==n_sentences):           
                #tau_name = f'{data_set_name}_tau_{i:02d}'
                tau_name = f'{tau_data_set_name}_{i:03d}'

                tau.set_name(tau_name)
                tau_dict[tau_name]= tau
                args_list.append(arguments)
                #print(arguments)
                i+=1

#print(tau_dict)

{'tau_alpha_001': <tau.core.BDDDialecticalStructure object at 0x7fa2d3f4b580>, 'tau_alpha_002': <tau.core.BDDDialecticalStructure object at 0x7fa2d3f630a0>, 'tau_alpha_003': <tau.core.BDDDialecticalStructure object at 0x7fa2d3f4bf40>, 'tau_alpha_004': <tau.core.BDDDialecticalStructure object at 0x7fa2d3ff6850>, 'tau_alpha_005': <tau.core.BDDDialecticalStructure object at 0x7fa2d3f4b490>, 'tau_alpha_006': <tau.core.BDDDialecticalStructure object at 0x7fa2d3ff60d0>, 'tau_alpha_007': <tau.core.BDDDialecticalStructure object at 0x7fa2d4b835e0>, 'tau_alpha_008': <tau.core.BDDDialecticalStructure object at 0x7fa2d4b8ad00>, 'tau_alpha_009': <tau.core.BDDDialecticalStructure object at 0x7fa2d3f4b2e0>, 'tau_alpha_010': <tau.core.BDDDialecticalStructure object at 0x7fa2d4e27070>, 'tau_alpha_011': <tau.core.BDDDialecticalStructure object at 0x7fa2d3f4b430>, 'tau_alpha_012': <tau.core.BDDDialecticalStructure object at 0x7fa2d6d45a90>, 'tau_alpha_013': <tau.core.BDDDialecticalStructure object at 0x

In [14]:
# saving dialectical structure as json file
overwrite = True
#output_file_path = path.join(data_dir, f'{data_set_name}_taus.json')
output_file_path = path.join(data_dir, f'{tau_data_set_name}.json')

if path.exists(output_file_path) and not overwrite:
    raise RuntimeError(f'Datafile {output_file_path} already exists. Remove file or set `overwrite` to true.')
print(f'Saving data set {tau_data_set_name} into {output_file_path}.')
with open(file=output_file_path, mode='w') as output_file:
    tau_dump(tau_dict, output_file, indent=4, serialize_implementation=True)

Saving data set tau_alpha into /home/sebastian/Nextcloud/Documents/mindmaps/events/projekte/re/reflective-equilibrium/github-repository/re-technical-report/data/tau_alpha.json.


In [21]:
# generating random initials commitments
# for each sentence pool we generate `n_init_coms` initial commitments

init_coms_dict = {n_sentence_pool: 
                  random_positions(n_sentences=n_sentence_pool, k=n_init_coms) for 
                  n_sentence_pool in sentence_pool_sizes}

print(init_coms_dict) 


{6: [{4, 6, -2, -5, -1}, {2, -4, -1}, {1, 3, -6, -5, -2}], 7: [{4, 5, 7, -2, -6, -3, -1}, {2, 7, -6, -5, -4}, {-5, 6, 7}]}


In [22]:
#data_set_name = "coms_alpha"

# serializing into a JSON file
#output_file_path = path.join(data_dir, f'{data_set_name}_coms.json')
output_file_path = path.join(data_dir, f'{coms_data_set_name}.json')

if path.exists(output_file_path) and not overwrite:
    raise RuntimeError(f'Datafile {output_file_path} already exists. Remove file or set over_write to true.')
print(f'Saving data set {data_set_name} into {output_file_path}.')

with open(file=output_file_path, mode='w') as output_file:
    tau_dump(init_coms_dict, output_file, indent=4, serialize_implementation=True)

Saving data set coms_alpha into /home/sebastian/Nextcloud/Documents/mindmaps/events/projekte/re/reflective-equilibrium/github-repository/re-technical-report/data/coms_beta.json.


## RE ensemble runs

### Loading data

In [12]:
## load dialectical structures
data_set_name = 're_data_20231104'
tau_file_path = path.join(data_dir, f'{data_set_name}_taus.json')

print(f'Loading taus from: {tau_file_path}')
input_file_path = tau_file_path
with open(file=input_file_path, mode='r') as input_file:
    tau_dict = tau_load(input_file, use_json_specified_type=True)
s_pools = {tau.sentence_pool().size() for tau in tau_dict.values()}
# we create equivalence classes of dialectical structures according to sentence pool size
taus_by_n = {n_sentence_pool:[] for n_sentence_pool in s_pools}
for tau in tau_dict.values():
    taus_by_n[tau.sentence_pool().size()].append(tau)

print(f'Number of taus in file: {len(tau_dict.values())}')
print(f'Sentence pools: {s_pools}')
#print(f'Tau names (keys): {list(tau_dict.keys())}')
#print(taus_by_n)

Loading taus from: /home/sebastian/Nextcloud/Documents/mindmaps/events/projekte/re/reflective-equilibrium/github-repository/re-technical-report/data/re_data_20231104_taus.json
Number of taus in file: 4
Sentence pools: {6, 7}


In [13]:
# loading initial commitments
coms_file_path = path.join(data_dir, f'{data_set_name}_coms.json')

print(f'Loading init coms from: {coms_file_path}')
input_file_path = coms_file_path
with open(file=input_file_path, mode='r') as input_file:
    coms_dict = tau_load(input_file)
print(coms_dict)

Loading init coms from: /home/sebastian/Nextcloud/Documents/mindmaps/events/projekte/re/reflective-equilibrium/github-repository/re-technical-report/data/re_data_20231104_coms.json
{'6': [[1, 2, 3, 5, -4]], '7': [[2, 3, 5, -7, -4, -1]]}


### Process parameters and used models

In [8]:
list(standard_model_params_varied_alphas(9))

[{'weights': {'account': 0.1, 'faithfulness': 0.8, 'systematicity': 0.1},
  'account_penalties': [0.0, 0.3, 1.0, 1.0],
  'faithfulness_penalties': [0.0, 0.0, 1.0, 1.0]},
 {'weights': {'account': 0.1, 'faithfulness': 0.7, 'systematicity': 0.2},
  'account_penalties': [0.0, 0.3, 1.0, 1.0],
  'faithfulness_penalties': [0.0, 0.0, 1.0, 1.0]},
 {'weights': {'account': 0.1, 'faithfulness': 0.6, 'systematicity': 0.3},
  'account_penalties': [0.0, 0.3, 1.0, 1.0],
  'faithfulness_penalties': [0.0, 0.0, 1.0, 1.0]},
 {'weights': {'account': 0.1, 'faithfulness': 0.5, 'systematicity': 0.4},
  'account_penalties': [0.0, 0.3, 1.0, 1.0],
  'faithfulness_penalties': [0.0, 0.0, 1.0, 1.0]},
 {'weights': {'account': 0.1, 'faithfulness': 0.4, 'systematicity': 0.5},
  'account_penalties': [0.0, 0.3, 1.0, 1.0],
  'faithfulness_penalties': [0.0, 0.0, 1.0, 1.0]},
 {'weights': {'account': 0.1,
   'faithfulness': 0.30000000000000004,
   'systematicity': 0.6},
  'account_penalties': [0.0, 0.3, 1.0, 1.0],
  'faithf

In [24]:
# 
# data_set_name = 're_data_20231105'
data_set_name = f"re_data_{datetime.now().strftime('%Y%m%d')}"
output_file_name = f'{data_set_name}_process_results.csv'
print(output_file_name)

# RE process parameters
track_branches = True

# RE weights
model_parameters_list = []

# weight resolution: 0.10
for model_parameters in standard_model_params_varied_alphas(9):
# testing 
#for model_parameters in standard_model_params_varied_alphas(3):
    # ToDo: Discuss with Andreas
    # rounding
    for weight_name in model_parameters["weights"].keys():
    
        model_parameters["weights"][weight_name] = round(model_parameters["weights"][weight_name],2)
        
    model_parameters_list.append(model_parameters)

print(len(model_parameters_list))
    
print(model_parameters_list)

# model variants

implementations = [# globally searching algorithm with quadratic G-function 
                   {'tau_module_name': 'tau',
                    'position_class_name':'StandardPosition',
                    'dialectical_structure_class_name': 'DAGDialecticalStructure',
                    'reflective_equilibrium_module_name': 'rethon',
                    'reflective_equilibrium_class_name': 'StandardGlobalReflectiveEquilibrium'},
                   # globally searching algorithm with linear G-function
                   {'tau_module_name': 'tau',
                    'position_class_name':'StandardPosition',
                    'dialectical_structure_class_name': 'DAGDialecticalStructure',
                    'reflective_equilibrium_module_name': 'model_variations',
                    'reflective_equilibrium_class_name': 'StandardGlobalReflectiveEquilibriumLinearG'},
                   # locally searching algorithm with quadratic G-function 
                   {'tau_module_name': 'tau',
                    'position_class_name':'StandardPosition',
                    'dialectical_structure_class_name': 'BDDDialecticalStructure',
                    'reflective_equilibrium_module_name': 'model_variations',
                    'reflective_equilibrium_class_name': 'StandardLocalReflectiveEquilibriumWithGO'},
                   # locally searching algorithm with linear G-function 
                   {'tau_module_name': 'tau',
                    'position_class_name':'StandardPosition',
                    'dialectical_structure_class_name': 'BDDDialecticalStructure',
                    'reflective_equilibrium_module_name': 'model_variations',
                    'reflective_equilibrium_class_name': 'StandardLocalReflectiveEquilibriumLinearGWithGO'},
                   ]


re_data_20231125_process_results.csv
36
[{'weights': {'account': 0.1, 'faithfulness': 0.8, 'systematicity': 0.1}, 'account_penalties': [0.0, 0.3, 1.0, 1.0], 'faithfulness_penalties': [0.0, 0.0, 1.0, 1.0]}, {'weights': {'account': 0.1, 'faithfulness': 0.7, 'systematicity': 0.2}, 'account_penalties': [0.0, 0.3, 1.0, 1.0], 'faithfulness_penalties': [0.0, 0.0, 1.0, 1.0]}, {'weights': {'account': 0.1, 'faithfulness': 0.6, 'systematicity': 0.3}, 'account_penalties': [0.0, 0.3, 1.0, 1.0], 'faithfulness_penalties': [0.0, 0.0, 1.0, 1.0]}, {'weights': {'account': 0.1, 'faithfulness': 0.5, 'systematicity': 0.4}, 'account_penalties': [0.0, 0.3, 1.0, 1.0], 'faithfulness_penalties': [0.0, 0.0, 1.0, 1.0]}, {'weights': {'account': 0.1, 'faithfulness': 0.4, 'systematicity': 0.5}, 'account_penalties': [0.0, 0.3, 1.0, 1.0], 'faithfulness_penalties': [0.0, 0.0, 1.0, 1.0]}, {'weights': {'account': 0.1, 'faithfulness': 0.3, 'systematicity': 0.6}, 'account_penalties': [0.0, 0.3, 1.0, 1.0], 'faithfulness_pena

In [6]:
# ToDiscuss: I guess we only need a custom generator if we want to vary/save parameters from the LocalREModel (like search depth)

class CustomEnsembleGenerator(GlobalREEnsembleGenerator):
    pass

### Running ensembles and saving results

In [7]:

for n_sentences in s_pools:
    
    # dialectical structures in inferential density range
    arguments_list = [tau.get_arguments() for tau in taus_by_n[n_sentences]]
    coms_list = coms_dict[str(n_sentences)]
    print(coms_list)

    ensemble_gen = CustomEnsembleGenerator(arguments_list = arguments_list, 
                                           n_sentence_pool = n_sentences,
                                           initial_commitments_list = coms_list,
                                           model_parameters_list = model_parameters_list,
                                           implementations = implementations,
                                           max_re_length = 100,
                                           create_branches = track_branches,
                                           max_branches = 500)


    ensemble_gen.ensemble_items_to_csv(
                                  output_file_name = output_file_name,
                                  output_dir_name =  data_dir,
                                  archive = True, # save the csv as archived tar.gz
                                  save_preliminary_results = False,  
                                  append=True)

[[1, 2, 3, 5, -4]]
2023-11-24 19:52:04,686 [INFO] rethon: Starting ensemble generation with 24 models runs (without branches)
[[2, 3, 5, -7, -4, -1]]
2023-11-24 19:52:14,890 [INFO] rethon: Starting ensemble generation with 24 models runs (without branches)
