#Main file
This main file is based on the scripts that I had for minimizing the machines from Netlogo. The original was made to capture output from Behaviour Space, processs the machines and then print it to use in Stata. This is too cumbersome, so decided to implement and do everything in Python so I can centralise all the analysis and work on the next algorithms such as Joint Machines, frequencies and unused behavioural states in order to analyse properly how the transitions are happening.

#1 Receiving the Netlogo output
The first step is to load the files. Also, as the global variables of interest in order to associate it with the output name file.

In [1]:
import pandas as pd
import numpy as np
import copy
from __future__ import division
from __future__ import with_statement
import minimization as minz #My script in same folder for minimization routines

In [2]:
#Choose here the Globals and Name used for the experiment to load.
#Make sure the files exist (i.e. from Netlogo simulations)
experiment_clue = "trial1"
n_states = 8
n_signals = 1 #only without signal for now
n_rounds = 20
N = 40
n_parents = 20

#Choose generations to load ("None" to import the whole file)
start_gen=None
number_of_gens=5 #So final generation imported is start_gen+number_of_gens-1

###################################################################################
###################################################################################
###################################################################################

#Equivalent of generations in the strategies file
total_pop=80 #total number of autos per generation
start_row_strat=(start_gen*total_pop if start_gen!=None else None)
number_rows_strat=(number_of_gens*total_pop if number_of_gens!=None else None)


globals_list = (experiment_clue, n_states, n_signals, n_rounds, N, n_parents) #Save them as a list

#Experiment name based on the chosen experiment_clue and globals
chosen_experiment = "%s_states_%s_signal_%s_rounds_%s_N_%s_parents_%s.txt" % globals_list

#Both files have to use the same "chosen experiment" (to make sure come from the same simulation in Netlogo)
summary_file_name = "summary_" + chosen_experiment #Summary output
strategies_file_name = "strategies_" + chosen_experiment #Strategies output

#Path to Netlogo outputs
netlogo_folder = "/Users/luisalejandrolee/Dropbox/Thesis Phd/\
Coordination autos Chapter three/outputs_from_netlogo/" #Netlogo outputs in this folder

#Get the first line as header (for when importing only some generations instead of the whole file)
with open(netlogo_folder+summary_file_name, 'r') as f:
    line_s = f.readline()
    line_s = line_s.split(',')
    line_s[len(line_s)-1]=line_s[len(line_s)-1].replace('\n','')#Delete last special carachter "\n"
#For the strategies file
with open(netlogo_folder+strategies_file_name, 'r') as f:
    line_st = f.readline()
    line_st = line_st.split(',')
    line_st[len(line_st)-1]=line_st[len(line_st)-1].replace('\n','')
    
#Read files and save them as data
df_sum = pd.read_csv(netlogo_folder + summary_file_name,\
                     skiprows=start_gen,nrows=number_of_gens)

df_strat = pd.read_csv(netlogo_folder + strategies_file_name,\
                       skiprows=start_row_strat,nrows=number_rows_strat)
#Replace header using first row of file (instead of from imported data)
df_sum.columns=line_s
df_strat.columns=line_st

#2 Minimise the automata
Use the functions to have a simple code here for minimising the auto and storing other relevant variables (as available states, etc)

In [3]:
#Other required globals
n_obs = 2 if n_signals == 0 else 4 #Define here (or change) the possible observations of the machines

canon_autos_list = [] #Create empty lists to store the processed autos below
min_autos_list = []


for i in df_strat.index: #For each row...
    netlogo_auto = df_strat.auto_long[i] #... for all netlogo_auto
    big_auto = minz.to_format_netlogo_auto(netlogo_auto) #Use function to convert the raw Netlogo auto in a list format
    init_state = big_auto[0] #Save initial state of the machine
    normal_auto = minz.new_empty_auto(n_obs, n_states) #Use function to create a new empty auto as a numpy array

    # Next block it to fill the new 'normal_auto' with the information from big_auto.
    # The objective is that normal_auto=big_auto but as an array (instead of a list)
    my_index = xrange(1, len(big_auto), n_obs + 1) # Each number in the index is where a state starts
    for i, j in enumerate(my_index):
        normal_auto['actions'][i] = big_auto[j]
        normal_auto['transitions'][i] = big_auto[j + 1:j + n_obs + 1]
        
    canon_auto = minz.convert_to_canonical(normal_auto, n_states, init_state, n_obs) #Use function for canonical form
    
    access_states = len(canon_auto) #n_states now is only the accesible states of the machine (before minimization)
    
    #Use function to get minimum behavioural equivalent auto
    #Passes "0" as 3rd argument because that's init_state now (always 0 for canonical auto)
    min_auto = minz.minimized_automaton(canon_auto, access_states, 0, n_obs)
    
    #Update autos lists
    canon_autos_list.append(canon_auto) #Store proccessed autos in the corresponding list
    min_autos_list.append(min_auto)
    
#Add the processed autos lists as columns to df_strat
df_strat["canon_autos"] = canon_autos_list #Add the lists with autos to the dataframe
df_strat["min_autos"] = min_autos_list

#3 Accesible states and minimum behavioural states

In [4]:
access_states = [len(x) for x in df_strat.canon_autos] #accesible states in the big machine
min_states = [len(x) for x in df_strat.min_autos] #accesible states in the minimized machine

df_strat["access_states"] = access_states
df_strat["min_states"] = min_states

#4 Joint machine (not minimized)
##Here for unused states when decided how to do it

Outputs the df_jms including the NOT minimized joint machines.

Outputs on df_strat the 'used_states' variable (containing list of used and unused states of minimized individual 
machines

In [None]:
#Prepare dataframe to keep track of unused states
df_strat["used_states"] = 0 # Will contain a list with states of each min_auto
used_states_list = []
for i, auto in enumerate(df_strat.min_autos): #all minimized autos
    a = [0 for ix in xrange(df_strat.min_states[i])] #List the size of minimised machine's states
    used_states_list.append(a)
df_strat["used_states"] = used_states_list #Add to dataframe

# Lists to keep track of joint machines
gen_list = []
jm_list = []


In [None]:
#Create the joint machines (not minimized)


for gen in df_sum.generation: #each generation
    #======HERE FOR PRINTING TIME============ 
    print "generation = ", gen #Useful for debugging (or keeping track of simulation time)
    #Next lines (the "for" block) is tricky:
    #It creates a dataframe containing only the autos with population = column and current generation.
    #Uses "iterrows()" to iterate over the index of the dataframe (df_strat), keeping it on i0, which is
    #needed to acces later the particular auto that was used for the joint machine (accesed by row0.min_autos)
    #This is used (instead of a simple enumerate) to access that row later when updating the used states (no signal)
    #Same logic for the second "for" block, but for population row.
    
    df_col = df_strat[(df_strat.population == "column") & (df_strat.generation == gen)][:] #column autos for this gen
    df_row = df_strat[(df_strat.population == "row") & (df_strat.generation == gen)][:] #row autos for this gen
    
    for i0, row0 in df_col.iterrows():
        for i1, row1 in df_row.iterrows():
            
            #print "generation = ", gen, "i0 = ", i0, "i1 = ", i1
            auto0 = row0.min_autos #Autos to pass to create_joint_machine function
            auto1 = row1.min_autos
            
            if n_signals==1: #With signal
                jm = minz.create_joint_machine_with_signal(auto0, auto1) #Function to create the joint machine
            if n_signals==0: #No signal
                jm = minz.create_joint_machine_no_signal(auto0, auto1) #Function to create the joint machine
            
            gen_list.append(gen) #Keeps track of generation
            jm_list.append(jm)   #Keeps track of joint machines
            
            #Update used_states (NO SIGNAL YET):
            #This part takes the information from the just create joint machine jm, which contains the states that
            #are visited by the two autos that created it. For each auto, then goes trough each state. The
            #list "used_states" contains a list of zeros with the number of states of the auto, each position
            #representing each state of the auto. So if state 1 of the machine is used in the joint machine, then
            #position 1 will be changed from zero to one, to indicate that the state is used.

            if n_signals==0: #No signal
                for st in jm["states"]: #for metastates in jointmachine
                    s0 = st[0] #state of auto0
                    df_strat["used_states"][i0][s0] = 1 # =1 for states visited. Unvisited remain 0

                for st in jm["states"]:
                    s1 = st[1] #state of auto1
                    df_strat["used_states"][i1][s1] = 1
                    
            #HERE could be a good place when decided on how to calculate used states with signal        
            if n_signals==1: #with signal
                pass

#Store the joint machines and generation (a new dataframe)
df_jms = pd.DataFrame(columns = ("generation", "jm")) #Store joint machines with associated generation
df_jms.generation = gen_list
df_jms.jm = jm_list

generation =  0
generation =  1
generation =  2
generation =  3
generation = 

#5 Minimize the joint machines 
Adds the 'min_jm' column to df_jms (tuples with the minimized joint machines)

In [None]:
min_jm_list = [] #to save the minimized joint machines, and add later to the dataframe (df_jms)

for jm in df_jms.jm: #all joint machines
#for jm in [df_jms.jm[24000], df_jms.jm[24001]]:

    if n_signals==1:#with signal
        canon_jm = minz.convert_to_canonical(jm, len(jm), 0, 2)
        min_jm = minz.minimized_automaton(canon_jm, len(canon_jm), 0, 2)

        #Next lines convert the min_jms, which is a dict, into a tuple
        #Converts an structured numpy array ("actions" and 'transitions' in the min_jm)into a tuple of tuples
        #This is so that it can be used as a key to use groupby (since tuple is inmutable)
        tup_transitions = tuple(tuple(pair_transitions) for pair_transitions in min_jm["transitions"])
        tup_actions = tuple(min_jm['actions'])
        min_jm = tuple(zip(tup_actions,tup_transitions))
    
    if n_signals==0:#no signal
        min_jm = minz.minimize_joint_machine_no_signal(jm) #minimize them (have actions and cyclestart)
        #Next lines convert the min_jms, which is a dict, into a tuple
        #Converts a list of lists ("actions" in the min_jm)into a tuple of tuples
        #This is so that it can be used as a key to use groupby (since tuple is inmutable)
        tup_actions = tuple(tuple(pair_actions) for pair_actions in min_jm["actions"]) #convert actions to tuples
        min_jm = (tup_actions, min_jm["cyclestart"]) #add the cyclestart to final min_jm tuple        
    

    min_jm_list.append(min_jm) #save the minimized machine to a list

# Save the minimized joint machines
df_jms["min_jms"] = None #new empty column in dataframe
df_jms.min_jms = min_jm_list #add the minimized joint machines to the dataframe

#6 Frequencies of minimized joint machines
Outputs dataframe "freqjm" with frequencies of joint machines
(Does it by transforming df_jms)

In [None]:
#Use Groupby and organize the data set for frequencies

g1 = df_jms.copy() #use intermediate copies to avoid potential bugs later. Not sure if actually needed...
g1 = g1.groupby([g1["generation"], g1["min_jms"]]) #split by groups
g1 = g1.count() #organize as frequency of joint machine per generation

interactions = N * N #number of joint machines per generation
freq_perc_list = [(x*100)/interactions for x in g1.jm] #list with frequency percentage of jm per generation
g1['freq_perc'] = freq_perc_list #add frequency to the dataframe

#jm_freq_threshold = 0 #Change to higher for easier visualization
#g1 = g1[g1.freq_perc > jm_freq_threshold] #keep machines with frequency higher than threshold


#Organise the dataframe

freqjm = g1.copy() #just in case...
freqjm = freqjm.rename(columns = {'jm':'freq'}) #rename column
freqjm = freqjm.reset_index() #reset_index converts the multiindex into normal columns (to use generation for 'sort')
freqjm = freqjm.sort(['generation', 'freq_perc'], ascending=[True, False]) #sort


#If no signal, show the lollipop machine as a string. Example: "AA BB >>AA<<"" for a machines that plays first
#AA, then BB, and then forever plays AA (whatever is inside >> << is the metamachine cycle)
if n_signals==0:#no signal
    jm_show = [minz.min_jm_no_signal_to_string(x) for x in freqjm.min_jms] #use function to convert to string
    freqjm['jm_show'] = jm_show #add to dataframe

#With signal, perhaps the jm_show (a good way to show the joint machine), is by using the Markov matrix
if n_signals==1:#no signal
    pass

#7 Unused behaviour and slack in construction measures

Unused not ready for signal. Check later

In [None]:
if n_signals==0:#for now, only without signal
    #Unused states: number of states not visited in the minimized machine
    unused_states = [len(x) - x.count(1) for x in df_strat.used_states] #unused states in min_autos
    
    #Unvisited: potential for novel behavior given change in the  input stream. Is unused states divided by min_states
    unvisited_measure = [(len(x)-x.count(1))/len(x) for x in df_strat.used_states]

#Behaviour_slack: slack in the potential behavior of the machine
#the more states you use, the more sophisticated you can become behaviorally.
behaviour_slack = [len(x)/n_states for x in df_strat.min_autos] #min_lenght/total states.

#construction_slack: slack in the construction of the complete machine
construction_slack = [x/n_states for x in df_strat.access_states]#accesible/total

if n_signals==0:#for now, only without signal:
    df_strat['unused_states'] = unused_states
    df_strat['unvisited_measure'] = unvisited_measure
    
df_strat['behaviour_slack'] = behaviour_slack
df_strat['construction_slack'] = construction_slack

#df_strat = df_strat.drop('used_states', 1)

In [None]:
#Take the average per generation of unused states, unvisited measure, and slack measures

strats = df_strat.copy() #just in case
strats = strats.groupby(strats.generation).mean() #take the mean of all the variables (by generation)
strats = strats.drop(['ID','score',],1) #not needed (1 is to drop columns instead of rows)
strats = strats.reset_index()

In [None]:
#Organise summary dataframe, and include the measures calculated above.

summary = df_sum.copy() #just in case
#delete columns that won't use
#to_delete = ['row_heads_A', 'row_heads_B', 'row_tails_A', 'row_tails_B', 'col_heads_A', 'col_heads_B',\
#'col_tails_A','col_tails_B','times_heads','times_tails']
#summary = summary.drop(to_delete, axis=1)

summary = pd.merge(summary, strats, on='generation') #merge datasets

#8 Regime identification

In [None]:
#Two regime classifications: based on top joint machine and based on percentage of play

#By top joint machine:
#Function to find the highest frequency percentage top machine
def find_top_jm (df, n=1, column='freq_perc'):
    return df.sort_index(by=column)[-n:]

#Apply the function to get the highest frequency joint machine per generation
topjm = freqjm.groupby('generation').apply(find_top_jm)

#Define regime as the top joint machine in a generation if its frequency is above the defined "regime_threshold"
#percentage. If none is above it, the regime is in "other"
regime_threshold = 50 
regime_jm = [jm if int(topjm.freq_perc[i]) > regime_threshold else 'not_threshold' for i,jm in enumerate(topjm.min_jms)]

#Add regime to summary dataframe
summary['regime_jm'] = regime_jm



In [None]:
#By percentage:
regime_av = [None for i in summary.index] #Create variable to fill

#Regime based on percetanges of machines playing AA or BB
for i,row in summary.iterrows(): #all rows
    A = row.coordination_A_perc
    B = row.coordination_B_perc
    if A > 0.8:
        regime_av[i] = 'Domination_AA'
    elif B > 0.8:
        regime_av[i] = 'Domination_BB'
    elif A > 0.35 and A < 0.55 and B > 0.35 and B < 0.55:
        regime_av[i] = 'Turn_Taking'
    elif A > 0.51 and A < 0.71 and B > 0.18 and B < 0.38:
        regime_av[i] = 'Biased_Turn_A'
    elif B > 0.51 and B < 0.71 and A > 0.18 and A < 0.38:
        regime_av[i] = 'Biased_Turn_B'
    else:
        regime_av[i] = 'Other'
        
summary['regime_av'] = regime_av  
#summary

#9 Epoch characterization
An epoch is defined as having the top joint machine on a generation (i.e. regime) to be the same over a window of 
past generations (e.g. was this period the same regime as in the past ones?). In that window of past regimes, some tolerance is permitted (i.e. some of them can be different, allowing for some errors).

The algorithm considers an epoch to have started when a regime appears a minimum number of times in the window of past regimes.

In [None]:
epochs = pd.DataFrame(columns=['epoch','duration','start','end']) #to be filled with all epoch information
#Parameters that can be changed
epoch_window = 3 #lagged regimes to be considered
epoch_tolerance = 1 #number of misses in the window before breaking an epoch

switch = False #is there an epoch already started?
length = 0 #of current epoch
current_epoch = {'epoch': '', 'duration': 0, 'start': 0, 'end': 0}#will store info of each epoch

for t,ep in enumerate(summary.regime_jm):#all generation (or regimes)
    if t >= epoch_window: #intial observations not considered due to lagged window
        lags = [summary.regime_jm[tt] for tt in xrange(t-epoch_window, t)] #window of past generations
        
        
        #BEGIN:An epoch begins. This is when switch is False (no epoch has started) and epoch criteria is fulfilled
        if lags.count(ep) >= (epoch_window-epoch_tolerance) and switch == False: #an epoch begins
            switch = True
            current_epoch['epoch'] = ep
            length += 1
            current_epoch['duration'] = length
            current_epoch['start'] = t
            #print 'begins ', current_epoch
        
        #CONTINUE:An epoch already is going, and criteria is met
        elif lags.count(ep) >= (epoch_window-epoch_tolerance) and switch == True: #an epoch continues
            length += 1
            current_epoch['duration'] = length           
            #print 'continues ', current_epoch
        
        
        #END:An epoch is going, but criteria is not met. This could also mean there is an exception.
        #An exception is a regime that shows up few times in a row (formally,less than the epoch_tolerance).
        #The objective is that an exception doesn't end up and epoch. Initially, exceptions end them. However,
        #This is dealt with later after the whole dataframe (epochs) is constructed.
        elif lags.count(ep) < (epoch_window-epoch_tolerance) and switch == True:
            current_epoch['end'] = t
            index = len(epochs.index)
            epochs.loc[index+1]=current_epoch
            
            switch = False #reset variables
            length = 0
            current_epoch = {'epoch': '', 'duration': 0, 'start': 0, 'end': 0}
            #print 'end ', current_epoch

        if t == len(summary.index)-1 and switch == True: #last generation that is not the end of an epoch
            current_epoch['end'] = len(summary.index)
            index = len(epochs.index)
            epochs.loc[index+1]=current_epoch
            
        #print 'lags', lags
    #print 'regime = ', ep,'\n'
    
#Once 'epochs' is finished, handle the exceptions:
#Take each row, and compare starting generation of an epoch with ending generation of the previous one.
#If they are close enough (dictated by epoch_tolerance), join both epochs (rows) as a single one

epochs_debug = copy.deepcopy(epochs)#capture for debugging before altering

#Next part should be done in a while loop until no more changes are done
for i in xrange(2, len(epochs.index)+1): #no zero position considered due to epochs starting at 1 (append coding)
    regime_t = copy.deepcopy(epochs.epoch[i]) #variables from the dataframe to manipulate
    regime_lag = copy.deepcopy(epochs.epoch[i-1])
    end_t = copy.deepcopy(epochs.end[i])
    start_t = copy.deepcopy(epochs.start[i])
    end_lag = copy.deepcopy(epochs.end[i-1])
    start_lag = copy.deepcopy(epochs.start[i-1])
    
    if regime_t == regime_lag and start_t-end_lag <= epoch_tolerance:#if considered as same epoch
        epochs = epochs.drop(i-1)
        epochs.loc[i, 'start'] = start_lag
        epochs.loc[i, 'duration'] = end_t-start_lag #if end_t!=None else len(summary)-start_lag)

#10 Export main dataframes
Export the three main data frames, so I can work with graphs and statistics from a different file. This makes the scripts a bit more modular, and also I just have to run the minimization procedures (this file) only once per experiment.

In [None]:
output_file_modifier='deleteme' #To change the name of output file

#Path to python outputs
python_folder = "/Users/luisalejandrolee/Dropbox/Thesis Phd/\
Coordination autos Chapter three/outputs_from_python/" #Netlogo outputs in this folder

#summary: contains main variables. Averages per generation
summary.to_csv(python_folder+output_file_modifier+'summary_'+chosen_experiment)

#freqjm: frequency of each joint machine per generation.
freqjm.to_csv(python_folder+output_file_modifier+'jm_'+chosen_experiment)

#epochs: each row has the regime (top joint machine if above threshold), start and end generation, and duration
epochs.to_csv((python_folder+output_file_modifier+'epochs_'+chosen_experiment))

#STOP! no need to run the script further
###Just for visualization
freqjm: frequency of all joint machines per generation (for transition analysis)

summary: main variables (averages per generation)

epochs: classification of epochs, with duration, starting and ending periods.

In [None]:
#For visualization of joint machines with different frequency thresholds

jm_freq_threshold = 0 #Change to higher for easier visualization (percentage)
freqjm[freqjm.freq_perc > jm_freq_threshold] #keep machines with frequency higher than threshold



In [None]:
summary

In [None]:
epochs