In [None]:
from __future__ import print_function
%matplotlib inline
import os, re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib as mpl
#mpl.style.use('v2.0')


In [None]:
DIR      = 'bench_py_v013'
DO_SAVE  = True # whether to save pdf output
MAX_DRIFT_PLOT = 0.005
IS_CONVERGED_CUTOFF = 0.75

max_step = 0
data = {}
empty_files = []
for fname in sorted(os.listdir(DIR)):
    if fname.startswith('run'):
        ffname = DIR+'/'+fname
        with open(ffname) as f:
            for l in f.readlines():
                if l[0]=='#' and 'num_memes' in l:
                    #print("Loading", fname)
                    d = dict(e.split('=') for e in l[1:].strip().split(', '))
                    key = (d['mutator_class'],d['temperature'],d['mutation_scale'])
                    df = pd.read_csv(ffname, comment='#', sep=' ')
                    if len(df) > 0:
                        cutoff_convergence_step = df.iloc[int(len(df)*IS_CONVERGED_CUTOFF)].Step
                        initial_comprehension = df.iloc[0].Comprehension
                        final_comprehension = df.iloc[-1].Comprehension
                        final_drift         = df.iloc[-1].MeanGrammarDrift
                        agent_drift         = df.iloc[-1].AgentGrammarDrift
                        variance            = df.iloc[-1].GrammarVar
                        final_step          = df.iloc[-1].Step
                        max_step = max(final_step, max_step)
                        convergence_step = df[df.Comprehension >= final_comprehension*0.95].iloc[0].Step
                        converged = convergence_step <= cutoff_convergence_step
                        data[key] = {'comprehension':final_comprehension, 
                                     'drift':final_drift, 
                                     'agent_drift': agent_drift,
                                     'variance': variance,
                                     'convergence_step': convergence_step,
                                     'converged': converged,
                                     'file': fname}
                    else:
                        empty_files.append((key, fname))
                    break
print("Loaded %d files" % len(data))

In [None]:
if len(empty_files):
    print("Following %d files were empty:" % len(empty_files))
    for key, f in empty_files:
        print("- ", f)
    print("\n")
        
not_converged = {k:v for k, v in data.items() if not v['converged']}
if len(not_converged):
    print("Following %d files might not have converged:" % len(not_converged) )
    ncols = 8
    nrows = int(np.ceil(len(not_converged)/float(ncols)))
    plt.figure(figsize=(12,1.5*nrows))
    for ndx, (key, v) in enumerate(sorted(not_converged.items(), key=lambda x: x[0])):
        print("- ", v['file'])
        plt.subplot(nrows,ncols,ndx+1)
        df = pd.read_csv(DIR+'/'+v['file'], comment='#', sep=' ')
        plt.plot(df.Step, df.Comprehension)
        plt.ylim([0,1])
        plt.title('%s\nT=%s, S=%s'% key, fontsize=7)
    plt.tight_layout()
    if DO_SAVE:
        plt.savefig('notconverged.pdf')

In [None]:
def floatsorted(l):
    return sorted(l, key=lambda x: float(x))

mutator_values, temp_values, scale_values = map(list, map(set, zip(*data.keys())))
mutator_values = sorted(mutator_values)
temp_values    = floatsorted(temp_values)
scale_values   = floatsorted(scale_values)

ms_ixs = { v:k for k, v in enumerate(scale_values) }
temp_ixs = { v:k for k, v in enumerate(temp_values) }


In [None]:
plt.figure(figsize=(12,50))
gs = gridspec.GridSpec(len(mutator_values), 3)
for ndxm, mutator in enumerate(mutator_values):
    for ndx, (col, vmax) in enumerate([('comprehension', 1), ('drift', MAX_DRIFT_PLOT), ('convergence_step', max_step)]):
        plt.subplot(gs[ndxm, ndx])
        mx = np.zeros((len(ms_ixs), len(temp_ixs)))
        mx[:] = np.nan
        
        for k, v in data.items():
            if k[0] == mutator:
                if col == 'convergence_step' or v['converged']:
                    mx[ms_ixs[k[2]],temp_ixs[k[1]]] = v[col]
        plt.imshow(mx, aspect='auto', vmin=0, vmax=vmax, interpolation='nearest', origin='lower')
        plt.xticks(range(len(temp_values)), temp_values, rotation=45)
        plt.yticks(range(len(scale_values)), scale_values)
        plt.gca().set_yticklabels(scale_values)
        if ndx  == 0: plt.ylabel('Mutation scale')
        if ndxm == len(mutator_values)-1: plt.xlabel('Temperature')
        plt.colorbar(ticks=[0, vmax])
        titlestr = col
        if col == 'comprehension':
            titlestr += ' (top=%0.2f)'%np.nanmax(mx)
        plt.title(mutator+'\n'+titlestr)
            
plt.tight_layout()
if DO_SAVE:
    plt.savefig('comprehension_and_drift.pdf')

In [None]:
plt.figure(figsize=(10,70))
maxdrift      = max(v['drift']       for k, v in data.items() if v['converged'])
maxagentdrift = max(v['agent_drift'] for k, v in data.items() if v['converged'])
maxvar        = max(v['variance']    for k, v in data.items() if v['converged'])

def get_vals(mutator, cols):
    return [v[col]]
gs = gridspec.GridSpec(len(mutator_values), 3)
for ndxm, mutator in enumerate(mutator_values):#[0:1]):
    compvec       = []
    driftvec      = []
    agentdriftvec = []
    variancevec   = []
    for k, v in data.items():
        if k[0] == mutator and v['converged']:
            compvec.append(v['comprehension'])
            driftvec.append(v['drift'])
            agentdriftvec.append(v['agent_drift'])
            variancevec.append(v['variance'])
            
    #plt.subplot(gs[ndxm, ndx])
    plt.subplot(gs[ndxm,0])
    plt.scatter(compvec, driftvec)
    plt.xlim([0, 1])
    plt.ylim([0, maxdrift])
    plt.xlabel('Comprehension')
    plt.ylabel('Mean Drift')
    plt.title(mutator+'\nComp/Mean Drift Tradeoff')

    plt.subplot(gs[ndxm,1])
    plt.scatter(compvec, agentdriftvec)
    plt.xlim([0, 1])
    plt.ylim([0, maxagentdrift])
    plt.xlabel('Comprehension')
    plt.ylabel('Agent Drift')
    plt.title(mutator+'\nComp/Agent Drift Tradeoff')

    plt.subplot(gs[ndxm,2])
    plt.scatter(compvec, variancevec)
    plt.xlim([0, 1])
    plt.ylim([0, maxagentdrift])
    plt.xlabel('Comprehension')
    plt.ylabel('Variance')
    plt.title(mutator+'\nComp/Variance Tradeoff')

plt.tight_layout()
if DO_SAVE:
    plt.savefig('comprehension_and_drift_tradeoff.pdf')

In [None]:
#AdditiveSingleExppUniform
data[('AdditiveSingleExppUniform','1e-08','0.0001')]