# Data Generation for "Technical Report - Assessing a Formal Model of Reflective Equilibrium"

This notebook can be used to generate data that is similar in all relevant aspect to the data used the technical report ["Assessing a Formal Model of Reflective Equilibrium"](https://re-models.github.io/re-technical-report/). (It will not reproduce the same data since the data is the result of a random process.)

## How to run this notebook

There are several possibilities to execute this notebook. You can, for instance,

1. execute this notebook on Colab: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/re-models/re-technical-report/blob/main/notebooks/data_generation.ipynb), or
2. execute this notebook locally in, for instance, [JupyterLab](https://jupyter.org/) by cloning the Github repository of the report (for details, see <https://github.com/re-models/re-technical-report>).

Note, however, that Colab is not suited to generate a dataset as large as the one used in the report. The predefined values will generate a small dataset on Colab. In every other environment, the predefined values correspond to those used to generate the full dataset.

## Installing and importing libraries

In [1]:
%pip install re-technical-report

In [14]:
# imports 
from rethon import GlobalREEnsembleGenerator
from rethon.util import standard_model_params_varied_alphas
from theodias.util import create_random_argument_list, tau_dump,tau_load, random_positions, inferential_density

from theodias import (
    BDDDialecticalStructure
)

from os import getcwd, path
from pathlib import Path

In [15]:
# relative path to data directory
data_dir = path.join(Path(getcwd()).parent.absolute(), "data")
#output_directory = path.join(path.dirname(getcwd()), "data")
# Are we on Colab?
on_colab = 'google.colab' in str(get_ipython())

## Creating dialectical structures and initial commitments

In [10]:
tau_data_set_name = "tau_alpha"

if on_colab:
    sentence_pool_sizes = [6]
else:
    sentence_pool_sizes = [6,7,8,9]

weights_list = [[1,0], # mean n_premises = 1
                 [3,1], # mean n_premises = 1.25
                 [1,1], # mean n_premises = 1.5
                 [1,3], # mean n_premises = 1.75
                 [0,1] # mean n_premises = 2
                ]
                  
if on_colab:
    # for illustrating purposes we only generate one tau for each sentence pool and each weight combination  
    n_tau_per_sentence_pool = 1 
else:
    # 10 taus for each sentence pool and each weight combination (i.e., 200 structures in sum)
    n_tau_per_sentence_pool = 10
    
max_n_premises = 2 # 1-2 premises per argument

In [4]:
# creation of random dialectical structures
tau_dict = {}
i = 1
for n_sentences in sentence_pool_sizes:
    for weights in weights_list:
        # just for keeping track
        args_list = []
        while len(args_list) < n_tau_per_sentence_pool:

            arguments = create_random_argument_list(n_arguments_min=n_sentences-2, 
                                                    n_arguments_max=n_sentences+1,
                                                    n_sentences=n_sentences, 
                                                    n_premises_max=max_n_premises,
                                                    n_premises_weights=weights)

            tau = BDDDialecticalStructure(n_sentences, arguments)

            # used sentences
            used_sentences = set(abs(s) for arg in arguments for s in arg)
            # check inferential density and usage of all sentences
            if (0.15<=inferential_density(tau)<=0.5) and (len(used_sentences)==n_sentences):           
                tau_name = f'{tau_data_set_name}_{i:03d}'

                tau.set_name(tau_name)
                tau_dict[tau_name]= tau
                args_list.append(arguments)
                i+=1

#print(tau_dict)

In [5]:
# saving dialectical structure as json file
overwrite = False
output_file_path = path.join(data_dir, f'{tau_data_set_name}.json')

if not on_colab:
    if path.exists(output_file_path) and not overwrite:
        raise RuntimeError(f'Datafile {output_file_path} already exists. Remove file or set `overwrite` to true.')
    print(f'Saving data set {tau_data_set_name}.')
    with open(file=output_file_path, mode='w') as output_file:
        tau_dump(tau_dict, output_file, indent=4, serialize_implementation=True)

Saving data set tau_alpha-2.


In [16]:
# generating random initials commitments
# number of initial commitment (for each sentence pool)
if on_colab:
    n_init_coms = 1
else:
    n_init_coms = 20
    
# for each sentence pool we generate `n_init_coms` initial commitments
init_coms_dict = {n_sentence_pool: 
                  random_positions(n_sentences=n_sentence_pool, k=n_init_coms) for 
                  n_sentence_pool in sentence_pool_sizes}

In [17]:
coms_data_set_name = "coms_alpha"

# serializing into a JSON file
output_file_path = path.join(data_dir, f'{coms_data_set_name}.json')

if not on_colab:
    if path.exists(output_file_path) and not overwrite:
        raise RuntimeError(f'Datafile {output_file_path} already exists. Remove file or set over_write to true.')
    print(f'Saving data set {coms_data_set_name}.')

    with open(file=output_file_path, mode='w') as output_file:
        tau_dump(init_coms_dict, output_file, indent=4, serialize_implementation=True)

Saving data set coms_alpha2.


## RE ensemble runs

### Loading data

In [12]:
## load dialectical structures
if not on_colab:
    tau_data_set_name = "tau_alpha"
    tau_file_path = path.join(data_dir, f'{tau_data_set_name}.json')
    input_file_path = tau_file_path
    with open(file=input_file_path, mode='r') as input_file:
        tau_dict = tau_load(input_file, use_json_specified_type=True)
    print(f'Number of taus in file: {len(tau_dict.values())}')

Number of taus in file: 200


In [23]:
coms_data_set_name = "coms_alpha"
# loading initial commitments
if not on_colab:
    coms_file_path = path.join(data_dir, f'{coms_data_set_name}.json')
    input_file_path = coms_file_path
    with open(file=input_file_path, mode='r') as input_file:
        init_coms_dict = tau_load(input_file)
    # converting str-keys to ints
    init_coms_dict = {int(key):value for key, value in init_coms_dict.items()}
#print(init_coms_dict)

### Process parameters and used models

In [7]:
# RE weights
model_parameters_list = []

if on_colab:
    # corresponds to weights [ 0.25, 0.5, 0.75]
    alpha_resolution = 3 
else:
    # corresponds to weights [ 0.1, 0.2, ..., 0.9]
    alpha_resolution = 9

for model_parameters in standard_model_params_varied_alphas(alpha_resolution):
    for weight_name in model_parameters["weights"].keys():
        model_parameters["weights"][weight_name] = round(model_parameters["weights"][weight_name],2)
    model_parameters_list.append(model_parameters)

print(len(model_parameters_list))   
#print(model_parameters_list)

# model variants
implementations = [# globally searching algorithm with quadratic G-function 
                   {'tau_module_name': 'theodias',
                    'position_class_name':'StandardPosition',
                    'dialectical_structure_class_name': 'DAGDialecticalStructure',
                    'reflective_equilibrium_module_name': 'rethon',
                    'reflective_equilibrium_class_name': 'StandardGlobalReflectiveEquilibrium'},
                   # globally searching algorithm with linear G-function
                   {'tau_module_name': 'theodias',
                    'position_class_name':'StandardPosition',
                    'dialectical_structure_class_name': 'DAGDialecticalStructure',
                    'reflective_equilibrium_module_name': 're_technical_report',
                    'reflective_equilibrium_class_name': 'StandardGlobalReflectiveEquilibriumLinearG'},
                   # locally searching algorithm with quadratic G-function 
                   {'tau_module_name': 'theodias',
                    'position_class_name':'StandardPosition',
                    'dialectical_structure_class_name': 'BDDDialecticalStructure',
                    'reflective_equilibrium_module_name': 're_technical_report',
                    'reflective_equilibrium_class_name': 'StandardLocalReflectiveEquilibriumWithGO'},
                   # locally searching algorithm with linear G-function 
                   {'tau_module_name': 'theodias',
                    'position_class_name':'StandardPosition',
                    'dialectical_structure_class_name': 'BDDDialecticalStructure',
                    'reflective_equilibrium_module_name': 're_technical_report',
                    'reflective_equilibrium_class_name': 'StandardLocalReflectiveEquilibriumLinearGWithGO'},
                    ]
#list(implementations.values())


36


### Running ensembles and saving results

In [9]:
output_file_name = 're_data_tau.csv'

for tau in tau_dict.values():
    ensemble_gen = GlobalREEnsembleGenerator(arguments_list = [tau.get_arguments()], 
                                            n_sentence_pool = tau.sentence_pool().size(),
                                            initial_commitments_list = init_coms_dict[tau.sentence_pool().size()],
                                            model_parameters_list = model_parameters_list,
                                            implementations = implementations,
                                            max_re_length = 100,
                                            create_branches = True,
                                            max_branches = 500)

    if not on_colab:
        ensemble_gen.ensemble_items_to_csv(
                                        output_file_name=output_file_name,
                                        output_dir_name = data_dir,
                                        archive = False, # save the csv as archived tar.gz
                                        save_preliminary_results = False,  
                                        append=True)

2024-02-23 11:42:53,798 [INFO] rethon: Starting ensemble generation with 12 models runs (not counting branches)
2024-02-23 11:42:57,213 [INFO] rethon: Starting ensemble generation with 12 models runs (not counting branches)
2024-02-23 11:43:03,614 [INFO] rethon: Starting ensemble generation with 12 models runs (not counting branches)
2024-02-23 11:43:08,307 [INFO] rethon: Starting ensemble generation with 12 models runs (not counting branches)
2024-02-23 11:43:21,774 [INFO] rethon: Starting ensemble generation with 12 models runs (not counting branches)
