
# Make sure to edit the first cell with the appropriate paths !!!!

# Make sure to edit the first cell with the appropriate paths !!!!

In [None]:
import sys, os
from pathlib import Path
#change the path so that the the directory points to the data folder on your system
#change the path so that the the directory points to the data folder on your system
#change the path so that the the directory points to the data folder on your system

data_dirs = ['~/Documents/UCD/Crutchfield_Group/ctmc_meps/large_s_2/',
    '~/Documents/UCD/Crutchfield_Group/ctmc_meps/serial_2/',
    '~/Documents/UCD/Crutchfield_Group/ctmc_meps/serial_1/'
    ]

data_dirs = [os.path.expanduser(data_dir) for data_dir in data_dirs]

In [None]:
import numpy as np
import matplotlib.pyplot as plt 
import copy


In [None]:
#makes the plots nicer with some global options
rc_dict = {'font.size':16, 'axes.labelsize':'large', 'ytick.right':False,'legend.loc':'lower right', 'legend.fontsize':'small', 'figure.autolayout':True, 'figure.figsize': (10,10), 'mathtext.fontset':'stix', 'font.family':'STIXGeneral'}
for k,v in rc_dict.items():
    plt.rcParams[k]=v

In [None]:
# functions to pull data from stored npz arrays, and store as a dictionary data set
def load_from_dir(data_dir):   
    data_dict = {}
    dirpath = Path(data_dir)

    for strength_dir in dirpath.glob('*'):
        #print(strength_dir.name)
        for file in strength_dir.glob('*.npz'):
            #print(file.name)
            ratio_string = ''.join([char for char in file.name if char.isdigit()])
            #print(ratio_string)
            if strength_dir.name not in data_dict:
                data_dict[strength_dir.name] = {}
            data_dict[f'{strength_dir.name}'][ratio_string] = {**np.load(file)}
    return data_dict

# function to combine a list of datasets into a single one
def combine_data(dicts):
    
    output_dict = copy.deepcopy(dicts[0])
    
    for dict2 in dicts[1:]:
        # Merge dict 2 with previous ones
        for key, ratios in dict2.items():

            if key not in output_dict.keys():
                output_dict[key] = dict2[key]
            else:
                for k, npz_dict in ratios.items():
                    if k not in output_dict[key].keys():
                        output_dict[key][k] = npz_dict
                    else:
                        curr_dict = output_dict[key][k]
                        S1, S2 = [d['s_values'] for d in [curr_dict, npz_dict]]
                        all_S = set().union( *[tuple(item) for item in [S1,S2] ] )
                        all_S = np.array(sorted(list(all_S)))
                        
                        data1, data2 = [ [d[f'{s:05}'] if s in Sn else [[],[],[]]  for s in all_S] for d, Sn in zip([curr_dict, npz_dict],[S1,S2])]
                        for i, s in enumerate(S1):
                            data1[i] = np.hstack((data1[i], data2[i]))
                            curr_dict[f'{s:05}'] = data1[i]
                        curr_dict['s_trials'] = [data.shape[-1] for data in data1]                 
                    
    return output_dict

## The next 2 cells must be run before the plot below will work
### the first cell defines the data processing function
### the second cell runs the function on the data of your choosing

#### read the embedded comments for hints about how to edit what is plotted


In [None]:
def process_data_dictionary(data_dict):
    all_data = {}
    for str_k, str_dict in data_dict.items():
        sub_dict = {}
        for ratio_k, ratio_dict in str_dict.items():
            N = ratio_dict['s_trials']
            S = ratio_dict['s_values']
            #print(N,S)
            data = [ratio_dict[f'{s:05}'] for s in S]
            #for each element, d, of the list "data", d[0] is meps epr, d[1] is ness epr, d[2] is unif epr

            for d in data:
                # cleanup to handle DB machines where NESS is ~EQ
                d[:, d[1] < 10E-16] = 1E-16
                # cleanup to handle DB machines where meps EPR is negative
                d[:, d[0] < 10E-16] = 1E-16
                # cleanup to eliminate cases where meps epr algorithm failed to get an epr less than NESS epr
                d = [ d[:, d[1]-d[0] > 0] for d in data]

            meps_data = [ (d[1]/d[0])-1 for d,s in zip(data, S)]
            '''
            NOTE: meps_data is what the next cell is going to plot on the y axis

            if you want to do absolute difference, for example,  you could do:
            meps_data = [ d[1]-d[0] for d in data]
            if you wanted to scale the data differently, like by the total number of possible transitions, you could do something like:
            #meps_data = [ d/s**2-s for d,s in zip(meps_data, S)]
            '''
            # should remove a couple catastrophic failures that are huge outliers, geberally, its exclusively a problem for 5 state machines or when DB machines sneak through.
            meps_data = [ d[np.abs(d) < 100] for d in meps_data]

            #print([ np.array(d).shape for d in meps_data])

            # nanmean and nanstd shouldn't be doing anything here, but should catch any nan answers that got through bug checks
            temp_list = [[np.nanmean(item), np.nanstd(item)] for item in meps_data]

            #print(temp_list)

            sub_dict[ratio_k] = np.array(temp_list)
            sub_dict[ratio_k+'S'] = S
            sub_dict[ratio_k+'N'] = N
        
        all_data[str_k] = sub_dict

    return all_data



In [None]:
# load different data sets
data_dicts = [load_from_dir(d) for d in data_dirs]

# combine data sets, if you want to combine them
combined_dict = combine_data(data_dicts)

#process the data
all_data  = process_data_dictionary(combined_dict)

In [None]:
#these three lines are so the color codding is consistent
r_vals = [1,5,10,20, 40,80]
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
color_dict = {f'{i:02}':c for i,c in zip(r_vals, colors)}


k_list = sorted([ int(item) for item in all_data.keys()])
# if we want to skip som pump strengths, we can do that here, the example line skips just the first one:
# k_list = k_list[1:]

fig, ax = plt.subplots(1,len(k_list), figsize=(34,5), sharey=False, sharex=True)
for i,key in enumerate(k_list):
    data = all_data[f'{key:04}']
    for k, val in data.items():
        
        if k[-1] != 'S' and k[-1] != 'N':
            if int(k)>0:
                
                ax[i].errorbar(data[k+'S'], val[:,0], yerr=3*val[:,1], markersize=4, marker='D',c=color_dict[f"{int(k):02}"], label=f'pump_prcnt={k}')
                #ax[i].legend()
                ax[i].set_xscale('log')  
                ax[i].set_yscale('log') 
                ax[i].set_title(f'pump_strength {key}%')


ax[-1].legend()

#here you can set manual limits:
ax[0].set_xlim(4,18_000)
#ax[0].set_ylim(-1,10)

#depending on what you want to plit, change axes lable
#ax[0].set_ylabel('EPR diff: $\\left(\\Sigma_{NESS} - \\Sigma_{MEPS}\\right)$')
ax[0].set_ylabel('EPR diff: $\\left(\\Sigma_{NESS}/\\Sigma_{MEPS}\\right)-1$')

    

In [None]:
#fig.savefig(data_dir+'../epr_diff')

# THIS NEXT PLOT SHOWS ALL DATA FOR A PARTICULAR STENGTH VALUE

### good for diagnosing odd behaving values above


note that the legend in each plot gives the mnimum and maximum value for EPR difference




In [None]:
### SET STR VALUE HERE
### SET STR VALUE HERE
### SET STR VALUE HERE

strength = 25

### SET STR VALUE HERE
### SET STR VALUE HERE
### SET STR VALUE HERE

folder = data_dir+f'{strength:04}/'
files = sorted(os.listdir(folder))
J = len(os.listdir(folder))
ax = None

for j, file in enumerate(files):
    ratio = file[-6:-4]
    a = np.load(folder+file)
    S = a['s_values']
    N = a['s_trials']
    
    
    d = [ a[f'{s:05}'] for s in S ]
    meps_data = [ d[0] for d,s in zip(d,S)]
    ness_data = [ d[1] for d,s in zip(d,S)]
    unif_data = [ d[2] for d,s in zip(d,S)]
    

    if j==0 and ax is None:
        fig, ax = plt.subplots(J, len(d), figsize=(30,20), sharex=True, sharey=True)
    
    ax[j,0].set_ylabel('NESS-MEPS;' + f'ratio={ratio:02}')
    for i in range(len(d)):
        if j == 0:
            ax[j,i].set_title(f'S={S[i]}')
        ax[-1,i].set_xlabel('MEPS EPR')

        diff = (ness_data[i] - meps_data[i])/ meps_data[i]
        idx = diff < 0
        
        if sum(idx) > 0:
            print(f'found {sum(idx)} instances in {len(idx)} total machines where NESS > MEPS for S={S[i]}, pump_ratio={ratio}')
            diffmin, mepsmin, nessmin = diff[idx].min(), meps_data[i][idx].min(), ness_data[i][idx].min()
            print(f'minimum diff, meps, ness: {diffmin, mepsmin, nessmin}')
        
        kwargs = {'linestyle':'none','marker':'o','markersize':4}
        extreme_vals = f'{np.min(diff):.0E},{np.max(diff):.0E}'
        
        
        #extreme_val = extreme_vals[np.argmax(np.abs(extreme_vals))]

        
        ax[j,i].plot(meps_data[i][~idx], diff[~idx], **kwargs, alpha=1, label=extreme_vals)
        
        xmean = meps_data[i][:].mean()

        ymean = (diff[:]).mean()
        y_std = (diff[:]).std()
        
        linekw = {'linewidth':1, 'linestyle':'--', 'color':'k'}
        ax[j,i].axhline(ymean, **linekw)
        ax[j,i].axhline(ymean-3*y_std, alpha=.5, **linekw)
        ax[j,i].axhline(ymean+3*y_std, alpha=.5, **linekw)
        ax[j,i].axvline(xmean, **linekw)
        #ax[j,i].axvline(40, **linekw)
    
        ax[j,i].legend()
               
    
        #ax[j,i].set_yscale('log')

    ax[0,0].set_yscale('log')
    ax[0,0].set_xscale('log')
    #ax[0,0].set_xticks([1E-16,1E-8,1E-1, 1E2])
    #ax[0,0].set_xlim([1E-20,100])
    #ax[0,0].set_ylim([1E-20,100])
    fig.suptitle(f'strength = {strength}')