#Main file
This main file is based on the scripts that I had for minimizing the machines from Netlogo. The original was made to capture output from Behaviour Space, processs the machines and then print it to use in Stata. This is too cumbersome, so decided to implement and do everything in Python so I can centralise all the analysis and work on the next algorithms such as Joint Machines, frequencies and unused behavioural states in order to analyse properly how the transitions are happening.

#1 Receiving the Netlogo output
The first step is to load the files. Also, as the global variables of interest in order to associate it with the output name file.

In [24]:
import pandas as pd
import numpy as np
import copy
from __future__ import division
import minimization as minz #My script in same folder for minimization routines


#Choose here the Globals and Name used for the experiment to load.
#Make sure the files exist (i.e. from Netlogo simulations)
experiment_clue = "trial1"
n_states = 8
n_signals = 0 #only without signal for now
n_rounds = 20
N = 40
n_parents = 20

jm_freq_threshold = 0 #used for showing the min joint machines with at least some frequency. Is in percentage (so 5 = 5%)

globals_list = (experiment_clue, n_states, n_signals, n_rounds, N, n_parents) #Save them as a list

#Experiment name based on the chosen experiment_clue and globals
chosen_experiment = "%s_states_%s_signal_%s_rounds_%s_N_%s_parents_%s.txt" % globals_list

#Both files have to use the same "chosen experiment" (to make sure come from the same simulation in Netlogo)
summary_file_name = "summary_" + chosen_experiment #Summary output
strategies_file_name = "strategies_" + chosen_experiment #Strategies output

#Path to Netlogo outputs
netlogo_folder = "/Users/luisalejandrolee/Dropbox/Thesis Phd/\
Coordination autos Chapter three/outputs_to_text/" #Netlogo outputs in this folder

#Read files and save them as data
df_sum = pd.read_csv(netlogo_folder + summary_file_name)
df_strat = pd.read_csv(netlogo_folder + strategies_file_name)

#2 Minimise the automata
Use the functions to have a simple code here for minimising the auto and storing other relevant variables (as available states, etc)

In [25]:
#Other required globals
n_obs = 2 if n_signals == 0 else 4 #Define here (or change) the possible observations of the machines

canon_autos_list = [] #Create empty lists to store the processed autos below
min_autos_list = []


for i in df_strat.index: #For each row...
    netlogo_auto = df_strat.auto_long[i] #... for all netlogo_auto
    big_auto = minz.to_format_netlogo_auto(netlogo_auto) #Use function to convert the raw Netlogo auto in a list format
    init_state = big_auto[0] #Save initial state of the machine
    normal_auto = minz.new_empty_auto(n_obs, n_states) #Use function to create a new empty auto as a numpy array

    # Next block it to fill the new 'normal_auto' with the information from big_auto.
    # The objective is that normal_auto=big_auto but as an array (instead of a list)
    my_index = xrange(1, len(big_auto), n_obs + 1) # Each number in the index is where a state starts
    for i, j in enumerate(my_index):
        normal_auto['actions'][i] = big_auto[j]
        normal_auto['transitions'][i] = big_auto[j + 1:j + n_obs + 1]
        
    canon_auto = minz.convert_to_canonical(normal_auto, n_states, init_state, n_obs) #Use function for canonical form
    
    access_states = len(canon_auto) #n_states now is only the accesible states of the machine (before minimization)
    
    #Use function to get minimum behavioural equivalent auto
    #Passes "0" as 3rd argument because that's init_state now (always 0 for canonical auto)
    min_auto = minz.minimized_automaton(canon_auto, access_states, 0, n_obs)
    
    #Update autos lists
    canon_autos_list.append(canon_auto) #Store proccessed autos in the corresponding list
    min_autos_list.append(min_auto)
    
#Add the processed autos lists as columns to df_strat
df_strat["canon_autos"] = canon_autos_list #Add the lists with autos to the dataframe
df_strat["min_autos"] = min_autos_list

#3)  Variables key for analysis

##3.1 Accesible states and minimum behavioural states

In [26]:
access_states = [len(x) for x in df_strat.canon_autos] #accesible states in the big machine
min_states = [len(x) for x in df_strat.min_autos] #accesible states in the minimized machine

df_strat["access_states"] = access_states
df_strat["min_states"] = min_states

##3.2 Joint machine (not minimized) and Unused States measure (WORKS ONLY WITHOUT SIGNAL)
Outputs the df_jms including the NOT minimized joint machines.

Outputs on df_strat the 'used_states' variable (containing list of used and unused states of minimized individual 
machines

In [27]:
#Prepare dataframe to keep track of unused states
df_strat["used_states"] = 0 # Will contain a list with states of each min_auto
used_states_list = []
for i, auto in enumerate(df_strat.min_autos): #all minimized autos
    a = [0 for ix in xrange(df_strat.min_states[i])] #List the size of minimised machine's states
    used_states_list.append(a)
df_strat["used_states"] = used_states_list #Add to dataframe

# Lists to keep track of joint machines
gen_list = []
jm_list = []

#Create the joint machines (not minimized) and track unused states
#Main code of this part

for gen in df_sum.generation: #each generation
    #======HERE FOR PRINTING TIME============ 
    print "generation = ", gen #Useful for debugging (or keeping track of simulation time)
    #Next line (the "for" block) is tricky:
    #It creates a dataframe containing only the autos with population = column and current generation.
    #Uses "iterrows()" to iterate over the index of the dataframe (df_strat), keeping it on i0, which is
    #needed to acces later the particular auto that was used for the joint machine (accesed by row0.min_autos)
    #This is used (instead of a simple enumerate) to access that row later when updating the used states 
    #Same logic for the second "for" block, but for population row
    
    for i0, row0 in df_strat[(df_strat.population == "column") & (df_strat.generation == gen)].iterrows():
        for i1, row1 in df_strat[(df_strat.population == "row") & (df_strat.generation == gen)].iterrows():
            
            #print "generation = ", gen, "i0 = ", i0, "i1 = ", i1
            auto0 = row0.min_autos #Autos to pass to create_joint_machine function
            auto1 = row1.min_autos
            
            #Joint machine only work WITHOUT signal so far (change "create_joint_machine" and "update_state" functions)
            jm = minz.create_joint_machine_no_signal(auto0, auto1) #Function to create the joint machine
            
            gen_list.append(gen) #Keeps track of generation
            jm_list.append(jm)   #Keeps track of joint machines
            
            #Update used_states:
            #This part takes the information from the just create joint machine jm, which contains the states that
            #are visited by the two autos that created it. For each auto, then goes trough each state. The
            #list "used_states" contains a list of zeros with the number of states of the auto, each position
            #representing each state of the auto. So if state 1 of the machine is used in the joint machine, then
            #position 1 will be changed from zero to one, to indicate that the state is used.
            
            for st in jm["states"]: #for metastates in jointmachine
                s0 = st[0] #state of auto0
                df_strat["used_states"][i0][s0] = 1 # 1 for states visited. Unvisited remain 0
            
            for st in jm["states"]:
                s1 = st[1] #state of auto1
                df_strat["used_states"][i1][s1] = 1


#Store the joint machines and generation (a new dataframe)
df_jms = pd.DataFrame(columns = ("generation", "jm")) #Store joint machines with associated generation
df_jms.generation = gen_list
df_jms.jm = jm_list

generation =  0
generation =  1
generation =  2
generation =  3
generation =  4
generation =  5
generation =  6
generation =  7
generation =  8
generation =  9
generation =  10
generation =  11
generation =  12
generation =  13
generation =  14
generation =  15
generation =  16
generation =  17
generation =  18
generation =  19
generation =  20
generation =  21
generation =  22
generation =  23
generation =  24
generation =  25
generation =  26
generation =  27
generation =  28
generation =  29
generation =  30


##3.3 Minimize the joint machines (without signal)
Adds the 'min_jm' column to df_jms (tuples with the minimized joint machines)

In [28]:
min_jm_list = [] #to save the minimized joint machines, and add later to the dataframe (df_jms)

for jm in df_jms.jm: #all joint machines
#for jm in [df_jms.jm[24000], df_jms.jm[24001]]:
    min_jm = minz.minimize_joint_machine_no_signal(jm) #minimize them (have actions and cyclestart)
    #print min_jm
    
    #Next lines convert the min_jms, which is a dict, into a tuple
    #Basically converts a list of lists ("actions" in the min_jm)into a tuple of tuples
    #This is so that it can be used as a key to use groupby (since tuple is inmutable)
    tup_actions = tuple(tuple(pair_actions) for pair_actions in min_jm["actions"]) #convert actions to tuples
    min_jm = (tup_actions, min_jm["cyclestart"]) #add the cyclestart to final min_jm tuple
    
    min_jm_list.append(min_jm) #save the minimized machine to a list
    #print min_jm
    
# Save the minimized joint machines
df_jms["min_jms"] = None #new empty column in dataframe
df_jms.min_jms = min_jm_list #add the minimized joint machines to the dataframe



##3.4 Frequencies of minimized joint machines
Outputs dataframe "freqjm" with frequencies of joint machines
(Does it by transforming df_jms)

In [29]:
#Use Groupby and organize the data set for frequencies

g1 = df_jms.copy() #use intermediate copies to avoid potential bugs later. Not sure if actually needed...
g1 = g1.groupby([g1["generation"], g1["min_jms"]]) #split by groups
g1 = g1.count() #organize as frequency of joint machine per generation

interactions = N * N #number of joint machines per generation
freq_perc_list = [(x*100)/interactions for x in g1.jm] #list with frequency percentage of jm per generation
g1['freq_perc'] = freq_perc_list #add frequency to the dataframe


g1 = g1[g1.freq_perc > jm_freq_threshold] #keep machines with frequency higher than threshold


#Organise the dataframe

freqjm = g1.copy() #just in case...
freqjm = freqjm.rename(columns = {'jm':'freq'}) #rename column
freqjm = freqjm.reset_index() #reset_index converts the multiindex into normal columns (to use generation for 'sort')
freqjm = freqjm.sort(['generation', 'freq_perc'], ascending=[True, False]) #sort
jmstring = [minz.min_jm_no_signal_to_string(x) for x in freqjm.min_jms] #use function to convert to string
freqjm['jmstring'] = jmstring #add to dataframe
freqjm

Unnamed: 0,generation,min_jms,freq,freq_perc,jmstring
511,0,"(((B, A),), 0)",48,3.0000,>> BA <<
200,0,"(((A, B),), 0)",28,1.7500,>> AB <<
743,0,"(((B, B), (A, A)), 1)",20,1.2500,BB >> AA <<
923,0,"(((B, B), (B, A)), 1)",20,1.2500,BB >> BA <<
742,0,"(((B, B),), 0)",18,1.1250,>> BB <<
0,0,"(((A, A),), 0)",16,1.0000,>> AA <<
196,0,"(((A, A), (B, B), (B, B), (A, B)), 3)",16,1.0000,AA BB BB >> AB <<
63,0,"(((A, A), (A, B)), 1)",15,0.9375,AA >> AB <<
169,0,"(((A, A), (B, B)), 1)",12,0.7500,AA >> BB <<
336,0,"(((A, B), (B, A)), 1)",12,0.7500,AB >> BA <<


##3.5 Unused behaviour and slack in construction measures

The measures are first captured for each machine in the dataframe df_strat.
However, they are then averaged across generation so that the averages can be copied to the summary dataframe.
So the measures in the latter are averages

In [30]:
#Unused states: number of states not visited in the minimized machine
unused_states = [len(x) - x.count(1) for x in df_strat.used_states] #unused states in min_autos

#Unvisited: potential for novel behavior given change in the  input stream. Is unused states divided by min_states 
unvisited_measure = [(len(x)-x.count(1))/len(x) for x in df_strat.used_states]

#Behaviour_slack: slack in the potential behavior of the machine
#the more states you use, the more sophisticated you can become behaviorally.
behaviour_slack = [len(x)/n_states for x in df_strat.min_autos] #min_lenght/total states.

#construction_slack: slack in the construction of the complete machine
construction_slack = [x/n_states for x in df_strat.access_states]#accesible/total

df_strat['unused_states'] = unused_states
df_strat['unvisited_measure'] = unvisited_measure
df_strat['behaviour_slack'] = behaviour_slack
df_strat['construction_slack'] = construction_slack

df_strat = df_strat.drop('used_states', 1)

In [31]:
strats = df_strat.copy() #just in case
strats = strats.groupby(strats.generation).mean() #take the mean of all the variables (by generation)
strats = strats.drop(['ID','score',],1) #not needed
strats = strats.reset_index()

In [32]:
summary = df_sum.copy() #just in case
#delete columns that won't use
to_delete = ['row_heads_A', 'row_heads_B', 'row_tails_A', 'row_tails_B', 'col_heads_A', 'col_heads_B',\
'col_tails_A','col_tails_B',' times_heads',' times_tails']
summary = summary.drop(to_delete, axis=1)

summary = pd.merge(summary, strats, on='generation') #merge datasets

summary

Unnamed: 0,generation,av_score_row,av_score_col,miscoordination_perc,coordination_B_perc,coordination_A_perc,ce,ce_individual,access_states,min_states,unused_states,unvisited_measure,behaviour_slack,construction_slack
0,0,1.184187,1.170187,0.529125,0.242438,0.228437,0,0,6.2375,5.85,0.0125,0.001786,0.73125,0.779687
1,1,1.628656,1.538844,0.3665,0.361656,0.271844,0,0,6.2875,6.0125,0.0125,0.001786,0.751563,0.785937
2,2,1.917906,1.534906,0.309437,0.536781,0.153781,0,0,6.3,5.875,0.0,0.0,0.734375,0.7875
3,3,2.849094,1.9595,0.038281,0.925656,0.036062,0,0,6.9625,6.175,0.075,0.010714,0.771875,0.870313
4,4,2.795,1.936094,0.053781,0.902563,0.043656,0,0,7.1625,6.475,0.0125,0.001563,0.809375,0.895312
5,5,2.8995,1.943156,0.031469,0.962437,0.006094,0,0,6.8875,6.025,0.0375,0.005357,0.753125,0.860938
6,6,2.993188,1.995563,0.00225,0.997687,6.3e-05,0,0,6.675,5.475,0.0,0.0,0.684375,0.834375
7,7,2.90575,1.977062,0.023438,0.952625,0.023938,0,0,6.7625,5.8,0.025,0.004167,0.725,0.845313
8,8,2.994344,1.996281,0.001875,0.998094,3.1e-05,0,0,6.4625,5.225,0.0,0.0,0.653125,0.807813
9,9,2.995938,1.997344,0.001344,0.998625,3.1e-05,0,0,5.9875,4.7375,0.0625,0.010417,0.592187,0.748437


In [19]:
#take the top joint machine and put it in summary as regime classification (use threshold or something)
#revise names
#update readm_me file
#make a commit

In [None]:

#FOR EXPLAINING STUFF


#other machines frequencies (minimised)
#regime identification (using joint machines)
#regime change identification
#Graphs and stats (automate them)

##Next:
#1) describe graphically his transition explanations