In [1]:
import numpy as np
import pandas as pd

In [95]:
def extract_script_info(df_s, n):  
    info = {}         
    script = df_s["s"]
    
    # prepare info vars
    nM = 0
    nS = 0
    nI = 0
    nD = 0
    j_cross = -1
    Delta = -1
    
    cell = [0,0]    
    # True when border is hit    
    ins_on_border = 0   
    
    # compute info
    for s in script:
        
        if (cell[0] == int(n/2)-1 and s != 'I'):
            nextD = int(s == 'D')           
            j_cross = cell[1] + 1 - nextD
            info["DI_half"] = nD + nextD - nI                        
        
        if (s == "M"):
            nM += 1
            cell[0] += 1
            cell[1] += 1
        if (s == "S"):
            nS += 1
            cell[0] += 1
            cell[1] += 1
        if (s == "D"):
            nD += 1
            cell[0] += 1            
        if (s == "I"):
            if (cell[0] == int(n/2)):
                ins_on_border += 1
            nI += 1
            cell[1] += 1                                                           
    
    # construct info dictionary
    info["nM"] = nM
    info["nS"] = nS
    info["nD"] = nD
    info["nI"] = nI
    info["d"] = nS + nD + nI
    info["d1"] = df_s["d1"]
    info["d2"] = df_s["d2"]
    info["edges"] = nS + nD + nI + nM
    info["j_cross"] = j_cross
    info["border_I"] = ins_on_border
    
    if (j_cross <= int(n/2)):
        Delta = int(n/2) - min(j_cross + ins_on_border, int(n/2)) 
    else:
        Delta = int(n/2) - max(j_cross - ins_on_border, int(n/2))
    info["Delta"] = Delta
    
    
    return info

In [96]:
def infos_on_file(file_name):
    info_df = pd.DataFrame()
    df = pd.read_csv(file_name)
    n = len(df.loc[0].x)
    for i in df.index:        
        info_df = info_df.append(pd.DataFrame(extract_script_info(df.loc[i],n), index=[i]))    
    return info_df

In [97]:
info_64 = infos_on_file("/tmp/Delta64.csv")
info_256 = infos_on_file("/tmp/Delta256.csv")
info_1024 = infos_on_file("/tmp/Delta1024.csv")
info_4096 = infos_on_file("/tmp/Delta4096.csv")



In [98]:
info_64["G"] = info_64["d1"] + info_64["d2"] - info_64["d"]
info_256["G"] = info_256["d1"] + info_256["d2"] - info_256["d"]
info_1024["G"] = info_1024["d1"] + info_1024["d2"] - info_1024["d"]
info_4096["G"] = info_4096["d1"] + info_4096["d2"] - info_4096["d"]

info_64["AbsDelta"] = info_64["Delta"].abs()
info_256["AbsDelta"] = info_256["Delta"].abs()
info_1024["AbsDelta"] = info_1024["Delta"].abs()
info_4096["AbsDelta"] = info_4096["Delta"].abs()

In [99]:
print("64    {0:.6f}".format(info_64.AbsDelta.mean()))
print("256   {0:.6f}".format(info_256.AbsDelta.mean()))
print("1024  {0:.6f}".format(info_1024.AbsDelta.mean()))
print("4096  {0:.6f}".format(info_4096.AbsDelta.mean()))

64    2.827200
256   7.598200
1024  19.581500
4096  49.750600


In [100]:
(info_256/256).describe()

Unnamed: 0,DI_half,nM,nS,nD,nI,d,d1,d2,edges,j_cross,border_I,Delta,G,AbsDelta
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,-0.000579,0.578223,0.304326,0.117451,0.117451,0.539228,0.275552,0.275667,1.117451,0.500579,0.000428,-0.000599,0.011991,0.02968
std,0.037915,0.02008,0.031901,0.014847,0.014847,0.015153,0.011775,0.011707,0.014847,0.037915,0.001507,0.037615,0.00833,0.023114
min,-0.140625,0.507812,0.167969,0.070312,0.070312,0.472656,0.230469,0.226562,1.070312,0.378906,0.0,-0.140625,0.0,0.0
25%,-0.027344,0.566406,0.28125,0.105469,0.105469,0.527344,0.269531,0.269531,1.105469,0.476562,0.0,-0.027344,0.003906,0.011719
50%,0.0,0.578125,0.304688,0.117188,0.117188,0.539062,0.277344,0.277344,1.117188,0.5,0.0,0.0,0.011719,0.023438
75%,0.023438,0.59375,0.324219,0.128906,0.128906,0.550781,0.285156,0.285156,1.128906,0.527344,0.0,0.023438,0.015625,0.042969
max,0.121094,0.65625,0.417969,0.179688,0.179688,0.59375,0.320312,0.3125,1.179688,0.640625,0.023438,0.121094,0.0625,0.140625


In [101]:
(info_1024).describe()

Unnamed: 0,DI_half,nM,nS,nD,nI,d,d1,d2,edges,j_cross,border_I,Delta,G,AbsDelta
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,0.1288,605.782,297.6646,120.5534,120.5534,538.7714,272.1461,272.0419,1144.5534,511.8712,0.1139,0.1289,5.4166,19.5815
std,24.608482,9.309981,15.652732,7.461294,7.461294,6.299628,4.971783,4.860381,7.461294,24.608482,0.406132,24.519295,3.399824,14.75598
min,-90.0,572.0,228.0,95.0,95.0,513.0,252.0,252.0,1119.0,434.0,0.0,-90.0,0.0,0.0
25%,-16.0,599.0,287.0,116.0,116.0,535.0,269.0,269.0,1140.0,495.0,0.0,-16.0,3.0,8.0
50%,0.0,606.0,298.0,120.0,120.0,539.0,272.0,272.0,1144.0,512.0,0.0,0.0,5.0,17.0
75%,17.0,612.0,308.0,125.0,125.0,543.0,276.0,275.0,1149.0,528.0,0.0,17.0,8.0,29.0
max,78.0,640.0,353.0,160.0,160.0,561.0,292.0,288.0,1184.0,602.0,6.0,78.0,22.0,90.0


In [108]:
info_4096["DI_abs"] = info_4096["DI_half"].abs()
info_4096[["Delta","DI_half","AbsDelta", "DI_abs", "nD"]].describe()

Unnamed: 0,Delta,DI_half,AbsDelta,DI_abs,nD
count,10000.0,10000.0,10000.0,10000.0,10000.0
mean,0.7292,0.7344,49.7506,49.8768,481.9509
std,61.963506,62.062103,36.941008,36.936433,14.944511
min,-209.0,-210.0,0.0,0.0,423.0
25%,-42.0,-42.0,20.0,20.0,472.0
50%,1.0,1.0,42.0,43.0,482.0
75%,43.0,43.0,73.0,73.0,492.0
max,209.0,209.0,209.0,210.0,542.0
