In [1]:
# Import log json file and view Gantt chart of the log
import json
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import numpy as np

In [2]:
# Gantt
def getDataGantt(logs, title, iters=50):
    dflist = []

    j = 0
    for log in logs: # Node logs

        i = 0

        for iterlog in log[1:]: # Operations
            if i > iters:
                break
            start = iterlog['start']
            stop = iterlog['end']
            if j==0 and i==0:
                offset = start
            start = start - offset
            stop = stop - offset
            #start = s[i,j]
            #stop = start + t[j]
            dflist.append(dict(Task="Iter %s" % i, Start=start, Finish=stop, Resource="Node %s" % j))
            i += 1
        j += 1
    df = pd.DataFrame(dflist)
    df['delta'] = df['Finish'] - df['Start']
    # Export df to csv
    #df.to_csv(title+'.csv')

    fig = px.timeline(df, x_start="Start", x_end="Finish", y="Resource", color="Task")
    fig.update_yaxes(autorange="reversed") 

    fig.layout.xaxis.type = 'linear'
    for d in fig.data:
        filt = df['Task'] == d.name
        d.x = df[filt]['delta'].tolist()

    fig.update_layout(title_text=title)
    return fig, df



In [16]:
def meanTime(logs):
    '''Calculate the mean time of the operations'''
    
    i = 0
    times = []
    for log in logs:
        mean = 0
        n = len(log)
        for iterlog in log:
            mean += iterlog['time']
        print('Node', i, mean/n)
        times.append(mean/n)
        i += 1
    return times

def commsTime(log_df):
    '''Calculate the communication time of the operations'''
    
    # Find the latest stop time in group 0 in each iteration
    # Find the earliest start time in group 1 in each iteration
    # Find the differences
    commfwd = np.zeros(max(log_df['Iter']))
    commbwd = np.zeros(max(log_df['Iter']))
    maxtimes = np.zeros(max(log_df['Iter']))
    itertime = np.zeros(max(log_df['Iter']))
    for j in range(max(log_df['Iter'])):
        filt0 = (log_df['Iter'] == j) & (log_df['Group'] == 0)
        filt1 = (log_df['Iter'] == j) & (log_df['Group'] == 1)
        filt2 = (log_df['Iter'] == j+1) & (log_df['Group'] == 0)
        start = min(log_df[filt1]['Start'])
        stop = max(log_df[filt0]['Stop'])
        nextstart = min(log_df[filt2]['Start'])
        nextstop = max(log_df[filt1]['Stop'])
        itertime[j] = nextstart - start
        # print('iter', j, 'Comm time', start- stop)
        commfwd[j] = start - stop
        commbwd[j] = nextstart - nextstop
        maxtime = max(log_df[log_df['Iter'] == j]['Delta'])
        maxtimes[j] = maxtime

    nodecomms = []
    nodes = max(log_df['Node'])+1
    for i in range(nodes):
        comms_i = []
        filt = log_df['Node'] == i
        group = 0 if i< nodes/2 else 1
        for j in range(1, max(log_df['Iter'])):
            if group == 0:
                k = j-1
                g = 1
            else: 
                k = j
                g = 0
            filt0 = (log_df['Iter'] == j)
            filt2 = (log_df['Iter'] == k)
            groupfilt = (log_df['Group'] == g)
            # Find difference between last stop time of previous and start time of i in iteration j
            start = min(log_df[filt0 & filt]['Start'])
            stop = max(log_df[filt2 & groupfilt]['Stop'])
            comm = start - stop
            comms_i.append(comm)
        nodecomms.append(comms_i)


    return commfwd, commbwd, maxtimes, itertime, nodecomms
    

In [4]:
# Build a dataframe from the logs
def buildDF(logs):
    '''Build a dataframe from the logs'''
    dflist = []
    j = 0
    for log in logs: # Node logs
        i = 0
        for iterlog in log[0:]: # Operations
            start = iterlog['start']
            stop = iterlog['end']
            delta = stop - start
            # Group is 0 if j is less than len(logs)/2, 1 otherwise
            group = 0 if j < len(logs)/2 else 1
            dflist.append(dict(Node=j, Iter=i, Start=start, Stop=stop, Delta=delta, Group=group))
            i += 1
        j += 1
    df = pd.DataFrame(dflist)
    return df


In [5]:
# Import log files 
n = 36
title = '.json' # rho 0.005 

logs = []
for node in range(n):
    logname = 'parallel_logs_'+str(node)+title
    with open(logname) as f:
        data = json.load(f)
    logs.append(data)


# logs is a list of lists with the following structure:
# log = [[node1_1, node1_2, ... ],[node2_1, node2_2, ...],...]
# where node1_1 is a dictionary for node 1 iter 1 with the following structure:
# node1_1 = {'start': 0, 'end': 0, 'value': 0, 'time': 0}



In [6]:
log_df = buildDF(logs)

In [9]:
fig, df = getDataGantt(logs, "SDP", iters=50)
fig.show()

In [10]:
t = meanTime(logs)

Node 0 0.03799902677536011
Node 1 0.005691554546356201
Node 2 0.009247145652770995
Node 3 1.7197132110595702e-05
Node 4 0.005290231704711914
Node 5 0.007668328285217285
Node 6 0.007105777263641358
Node 7 0.0072036123275756835
Node 8 0.004963295459747314
Node 9 0.006127398014068603
Node 10 0.004529471397399902
Node 11 0.005850350856781006
Node 12 0.005030512809753418
Node 13 0.004537224769592285
Node 14 0.006339485645294189
Node 15 0.004063839912414551
Node 16 0.003737049102783203
Node 17 0.0063323163986206054
Node 18 0.006177911758422852
Node 19 0.004901762008666992
Node 20 0.006585953235626221
Node 21 0.016110384464263917
Node 22 0.00929299831390381
Node 23 0.011284725666046143
Node 24 0.009932003021240234
Node 25 0.006984028816223144
Node 26 0.008265554904937744
Node 27 0.012876310348510743
Node 28 0.008564386367797851
Node 29 0.012712719440460206
Node 30 0.009792630672454833
Node 31 0.009138076305389405
Node 32 0.012499256134033203
Node 33 0.015305018424987793
Node 34 0.016705427169

In [17]:
fwdtimes, bwdtimes, maxtimes, itertimes, nodecomms = commsTime(log_df)
print('Mean comm fwd', np.mean(fwdtimes))
print('Mean comm bwd', np.mean(bwdtimes))
print('Mean time of maxes in group 0', np.mean(maxtimes))
print('Max time of maxes in group 0', max(maxtimes))
print('Mean iter time', np.mean(itertimes))
print('Max iter time', max(itertimes))
for i in range(n):
    print('Node', i, 'Mean comm', np.mean(nodecomms[i]))
    print('Node', i, 'Max comm', max(nodecomms[i]))

Mean comm fwd 0.027555024985111122
Mean comm bwd 0.13140476833690296
Mean time of maxes in group 0 0.07061253171978575
Max time of maxes in group 0 0.24224448204040527
Mean iter time 0.3269198470645481
Max iter time 0.5179843902587891
Node 0 Mean comm 0.1570114043294167
Node 0 Max comm 0.3449440002441406
Node 1 Mean comm 0.14510489239984628
Node 1 Max comm 0.3228445053100586
Node 2 Mean comm 0.14970607903538918
Node 2 Max comm 0.2990400791168213
Node 3 Mean comm 0.14983397843886395
Node 3 Max comm 0.2868478298187256
Node 4 Mean comm 0.1610548447589485
Node 4 Max comm 0.2945859432220459
Node 5 Mean comm 0.17269945874506112
Node 5 Max comm 0.41658449172973633
Node 6 Mean comm 0.17085962879414462
Node 6 Max comm 0.3294384479522705
Node 7 Mean comm 0.18341205557998347
Node 7 Max comm 0.3427090644836426
Node 8 Mean comm 0.18591473540481257
Node 8 Max comm 0.3937852382659912
Node 9 Mean comm 0.19358994522873235
Node 9 Max comm 0.38686490058898926
Node 10 Mean comm 0.1994169585558833
Node 10 

In [None]:
# Took 90.27 s for 100 iterations