In [257]:
%matplotlib inline

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import pprint
import csv
import datetime
import math


In [258]:
def extract_time(t1,t2,t3=None,t4=None):
    
    try:
        t1 = datetime.datetime.strptime(t1, "%Y-%m-%d %H:%M:%S.%f")
    except:
        t1 = datetime.datetime.strptime(t1, "%Y-%m-%d %H:%M:%S")
        
    try:
        t2  = datetime.datetime.strptime(t2, "%Y-%m-%d %H:%M:%S.%f")
    except:
        t2  = datetime.datetime.strptime(t2, "%Y-%m-%d %H:%M:%S")
        
    if t3 is not None:
        
        try:
            t3   = datetime.datetime.strptime(t3, "%Y-%m-%d %H:%M:%S.%f")
        except:
            t3   = datetime.datetime.strptime(t3, "%Y-%m-%d %H:%M:%S")
        
        t4 = t3 + datetime.timedelta(seconds=10)
        '''
        try:
            t4  = datetime.datetime.strptime(t4, "%Y-%m-%d %H:%M:%S.%f")
        except:
            t4  = datetime.datetime.strptime(t4, "%Y-%m-%d %H:%M:%S")
        ''' 
            
        return ((t2-t1)+(t4-t3)).total_seconds()
    
    else:
        return (t2-t1).total_seconds()

In [259]:
def extract_core_overhead(df):
    alloc_start,alloc_stop,dealloc_start,dealloc_stop = df['timestamp']
    core_ov = extract_time(alloc_start,alloc_stop,dealloc_start,dealloc_stop)
    
    return core_ov

In [260]:
df_pat = pd.read_csv('pipeline_data/enmd_pat_overhead_1_1.csv')
df_pat

Unnamed: 0,step,probe,timestamp
0,step_1,start_time,2015-09-29 03:36:24.720677
1,step_1,wait_time,2015-09-29 03:36:24.732291
2,step_1,res_time,2015-09-29 03:36:29.861253
3,step_1,stop_time,2015-09-29 03:36:29.862303
4,step_2,start_time,2015-09-29 03:36:29.862336
5,step_2,wait_time,2015-09-29 03:36:29.887126
6,step_2,res_time,2015-09-29 03:36:35.925473
7,step_2,stop_time,2015-09-29 03:36:35.925956


In [261]:
def extract_pat_overhead(df):
    step_1_df = df[0:4]
    step_2_df = df[4:8]
    
    
    #print sim_df['timestamp']
    
    start1,wait1,res1,stop1 = step_1_df['timestamp']
    step_1_ov = extract_time(start1,wait1,res1,stop1)
    #part2 = extract_time(start2,wait2,res2,stop2)
    #post = extract_time(post_start,post_stop)    
    #step_1_ov = part1 + part2 + post

    start2,wait2,res2,stop2 = step_2_df['timestamp']
    step_2_ov = extract_time(start2,wait2,res2,stop2)
    #part2 = extract_time(start2,wait2,res2,stop2)
    #post = extract_time(post_start,post_stop)    
    #ana_ov = part1 + part2 + post
    
    return step_1_ov + step_2_ov

In [262]:
def extract_timing_info(df,cores,inst):
    
    step_1_data, step_2_data = extract_data_time(df,cores,inst)
    step_1_exec, step_2_exec = extract_exec_time(df,cores,inst)
    
    return [step_1_data,step_2_data],[step_1_exec, step_2_exec]

In [263]:
def extract_data_time(df,cores,inst):
    req_df = df[['step','StagingInput','Allocating','PendingAgentOutputStaging','Done']]
    
    #preloop_df = req_df[:1]
    step_1_df = req_df[0:128]
    step_2_df = req_df[128:]
    
    #preloop_data = None
    step_1_data = None
    step_2_data = None
    
    
    # Preloop extraction
    #step,t1,t2,t3,t4 =  preloop_df.loc[0,:]
    #preloop_data = extract_time(t1,t2,t3,t4)
   
    
    
    #Step_1 extraction
    per_unit_data_time  = list()
    for row in step_1_df.iterrows():
        step,t1,t2,t3,t4 = row[1:][0]
        #print t1,t2,t3,t4
        #print row
        per_unit_data_time.append(extract_time(t1,t2,t3,t4))
        
    if cores == inst:
        kern_1 = per_unit_data_time[:inst]
        kern_2 = per_unit_data_time[inst:]
        step_1_data = np.average(kern_1) + np.average(kern_2)
    elif inst > cores:
        pass
    
    
    #Step_2 extraction
    per_unit_data_time  = list()
    for row in step_2_df.iterrows():
        step,t1,t2,t3,t4 = row[1:][0]
        #print t1,t2,t3,t4
        #print row
        per_unit_data_time.append(extract_time(t1,t2,t3,t4))
    
    if cores == inst:
        kern_1 = per_unit_data_time[:inst]
        kern_2 = per_unit_data_time[inst:]
        step_2_data = np.average(kern_1) + np.average(kern_2)
    elif inst > cores:
        pass
    
    
    
    #print preloop_data, sim_data, ana_data
    #return preloop_data, sim_data, ana_data
    return step_1_data, step_2_data

In [264]:
def extract_exec_time(df,cores,inst):
    req_df = df[['step','Executing','PendingAgentOutputStaging']]
    
    step_1_df = req_df[0:128]
    step_2_df = req_df[128:]
    
    #preloop_exec = None   
    step_1_exec = None
    step_2_exec = None
    
    
    # Preloop extraction
    #step,t1,t2 =  preloop_df.loc[0,:]
    #preloop_exec = extract_time(t1,t2)

    
    #Simulation extraction
    #STUPID ME SET CORES PER CU = 2!!
    per_unit_exec_time  = list()
    for row in step_1_df.iterrows():
        step,t1,t2 = row[1:][0]
        #print t1,t2,t3,t4
        #print row
        per_unit_exec_time.append(extract_time(t1,t2))
        
    if cores == inst:
        kern_1 = per_unit_exec_time[:inst]
        kern_2 = per_unit_exec_time[inst:]
        step_1_exec = (np.average(kern_1) + np.average(kern_2))*2
    elif inst > cores:
        pass
    
  
    
    #Analysis extraction
    per_unit_exec_time  = list()
    for row in step_2_df.iterrows():
        step,t1,t2 = row[1:][0]
        #print t1,t2,t3,t4
        #print row
        per_unit_exec_time.append(extract_time(t1,t2))
        
    if cores == inst:
        kern_1 = per_unit_exec_time[:inst]
        kern_2 = per_unit_exec_time[inst:]
        step_2_exec = (np.average(kern_1) + np.average(kern_2))*2
    elif inst > cores:
        pass
    
    
    
    #print preloop_exec, sim_exec, ana_exec
    #return preloop_exec, sim_exec, ana_exec
    return step_1_exec, step_2_exec

In [265]:
def extract_rp_overhead(df,data,exec_list):
    
    #pre_loop_df = df[0:1]
    step_1_df = df[0:4]
    step_2_df = df[4:8]
  
    
￼
    '''
    start,wait,res,stop = pre_loop_df['timestamp'] 
    preloop_wait = extract_time(wait,res)
    preloop_rp_ov = preloop_wait - data[0] - exec_list[0]
    '''
    
    start1,wait1,res1,stop1 = step_1_df['timestamp']
    step_1_wait = extract_time(wait1,res1)
    step_1_rp_ov = step_1_wait - data[0] - exec_list[0]
    
    start1,wait1,res1,stop1 = step_2_df['timestamp']
    step_2_wait = extract_time(wait1,res1)
    step_2_rp_ov = step_2_wait - data[1] - exec_list[1]
    
    
    #print sim_wait, data[1], exec_list[1]
    rp_ov = step_1_rp_ov + step_2_rp_ov
    
    
    #print preloop_rp_ov, sim_rp_ov, ana_rp_ov
    
    return rp_ov

In [272]:
f_core = pd.read_csv('pipeline_data/enmd_core_overhead_{0}_{1}.csv'.format(c,t),header=0,sep=',',skipinitialspace=True)
df_core

Unnamed: 0,step,probe,timestamp
0,allocate,start_time,2015-09-29 03:15:07.116954
1,allocate,stop_time,2015-09-29 03:15:09.355502
2,deallocate,start_time,2015-09-29 03:17:47.276245
3,deallocate,stop_time,2015-09-29 03:18:01.894803


In [271]:
#MAIN

comp_df = pd.DataFrame(columns=['EnMD Core overhead',
                                'EnMD Pat overhead',
                                'RP overhead',
                                'Step 1 execution time',
                                'Step 2 execution time',
                                'data movement'])

err_df = pd.DataFrame(columns=['EnMD Core overhead',
                                'EnMD Pat overhead',
                                'RP overhead',
                                'Step 1 execution time',
                                'Step 2 execution time',
                                'data movement'])

cores = [1,16,32,64,128]
trials = 5
for c in cores:
    
    core_ov_list = list()
    pat_ov_list = list()
    rp_ov_list = list()
    data_list = list()
    #preloop_exec_list = list()
    step_1_exec_list = list()
    step_2_exec_list = list()
    
    for t in range(1,trials+1):
        
        #f_core = open('experiment_iter{0}_p{1}_i{1}/enmd_core_overhead.csv'.format(t,c))
        df_core = pd.read_csv('pipeline_data/enmd_core_overhead_{0}_{1}.csv'.format(c,t),header=0,sep=',',skipinitialspace=True)
        df_pat = pd.read_csv('pipeline_data/enmd_pat_overhead_{0}_{1}.csv'.format(c,t),header=0,sep=',',skipinitialspace=True)
        df_profile = pd.read_csv('pipeline_data/profile_{0}_{1}.csv'.format(c,t),header=0,sep=',',skipinitialspace=True)
        core_ov = extract_core_overhead(df_core)
        pat_ov = extract_pat_overhead(df_pat)
        data, exec_list = extract_timing_info(df_profile,cores=c,inst=c)
        rp_ov = extract_rp_overhead(df_pat,data,exec_list)
        
        core_ov_list.append(core_ov)
        pat_ov_list.append(pat_ov)
        rp_ov_list.append(rp_ov)
        #preloop_exec_list.append(exec_list[0])
        step_1_exec_list.append(exec_list[0])
        step_2_exec_list.append(exec_list[1])
        data_list.append(sum(data))
        #print rp_ov_list
        
    #print 'Core ov: ',core_ov_list
    #print 'Pat ov: ',pat_ov_list
    #print 'Data mov: ',data_list
    #print 'Preloop exec: ',preloop_exec_list
    #print 'Sim exec: ',sim_exec_list
    #print 'Ana exec: ',ana_exec_list
    
    comp_df.loc['{0}/{1}'.format(c,c)] = [np.average(core_ov_list),
                                          np.average(pat_ov_list),
                                          np.average(rp_ov_list),
                                          #np.average(preloop_exec_list),
                                          np.average(step_1_exec_list),
                                          np.average(step_2_exec_list),
                                          np.average(data_list)]
    
    err_df.loc['{0}/{1}'.format(c,c)] = [
                                          np.std(core_ov_list)/math.sqrt(trials),
                                          np.std(pat_ov_list)/math.sqrt(trials),
                                          np.std(rp_ov_list)/math.sqrt(trials),
                                          #np.std(preloop_exec_list)/math.sqrt(trials),
                                          np.std(step_1_exec_list)/math.sqrt(trials),
                                          np.std(step_2_exec_list)/math.sqrt(trials),
                                          np.std(data_list)/math.sqrt(trials)
                                         ]


comp_df

Unnamed: 0,EnMD Core overhead,EnMD Pat overhead,RP overhead,Step 1 execution time,Step 2 execution time,data movement
1/1,14.152523,20.036794,,9.232,,
16/16,13.500513,20.321455,,15.596175,,
32/32,13.893898,20.632267,,22.3041,,
64/64,13.874541,21.26433,,35.538362,,
128/128,13.729946,22.545342,,,,


In [267]:
N = 5
ind = np.arange(N)
width = .35

rp_overhead = comp_df['RP overhead']
enmd_pattern_overhead = comp_df['EnMD Pat overhead']
enmd_core_overhead = comp_df['EnMD Core overhead']
movement = comp_df['data movement']
analysis = comp_df['ana execution time']
simulation = comp_df['sim execution time']

rp_overhead_graphs = ()
for i in range(0,len(rp_overhead)):
    temp = (plt.bar(i,rp_overhead[i],width,color='b'))
    rp_overhead_graphs = rp_overhead_graphs + temp

enmd_pattern_overhead_graphs = ()
for i in range(0,len(enmd_pattern_overhead)):
    temp = (plt.bar(i,enmd_pattern_overhead[i],width,color='g',bottom = rp_overhead[i]))
    enmd_pattern_overhead_graphs = enmd_pattern_overhead_graphs + temp

enmd_core_overhead_graphs = ()
for i in range(0,len(enmd_core_overhead)):
    temp = (plt.bar(i,enmd_core_overhead[i],width,color='m',bottom = rp_overhead[i] + enmd_pattern_overhead[i]))
    enmd_core_overhead_graphs = enmd_core_overhead_graphs + temp

m_graphs = ()
for i in range(0,len(movement)):
    offset = rp_overhead[i] + enmd_pattern_overhead[i] + enmd_core_overhead[i]
    temp = (plt.bar(i,movement[i],width,color='r',bottom=offset))
    m_graphs = m_graphs + temp
    
a_graphs = ()
for i in range(0,len(analysis)):
    
    
    offset = rp_overhead[i] + enmd_pattern_overhead[i] + enmd_core_overhead[i] + movement[i]
    temp = (plt.bar(i,analysis[i],width,color='c',bottom=offset))
    a_graphs = a_graphs + temp

s_graphs = ()
for i in range(0,len(simulation)):
    
    #print analysis[i], movement[i]
    offset = rp_overhead[i] + enmd_pattern_overhead[i] + enmd_core_overhead[i] + movement[i] + analysis[i]
    temp = (plt.bar(i,simulation[i],width,color='y',bottom=offset))
    s_graphs = s_graphs + temp

legend_titles = ('Simulation Execution Time', 'Analysis execution time', 'Data Movement','Enmd Core Overhead', 'Enmd Pattern Overhead','Rp Overhead')
legend_tuples = (s_graphs[0], a_graphs[0], m_graphs[0],enmd_core_overhead_graphs[0], enmd_pattern_overhead_graphs[0],rp_overhead_graphs[0])

plt.ylabel('Time (seconds)')
plt.xlabel('Cores/Simulation instances (Number of analysis instances = 1)')
plt.title('Makefile and Character Count Using Simulation Analysis')
plt.xticks(ind+width/2., ('1/1', '16/16', '32/32', '64/64', '128/128') )
plt.yticks(np.arange(0,250,20))
plt.legend(legend_tuples,legend_titles,loc='upper left')
plt.grid()
#plt.legend( (p1[0], p2[0],p3[3]), ('Data Movement', 'Analysis execution time', 'Simulation Execution time'), loc='upper left' )
#plt.legend( (t1[0]), ('Data Movement', 'Analysis execution time', 'Simulation Execution time'), loc='upper left' )
#rp_overhead[i] + enmd_pattern_overhead[i]plt.savefig("/home/nikhil/Documents/research/enmd-pattern-testing/apes_sc15/img/sa_graph.png")
plt.savefig('img/sa_weak_scaling.png')

KeyError: 'ana execution time'