In [1]:
# Import log json file and view Gantt chart of the log
import json
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import numpy as np

In [22]:
# Gantt
def getDataGantt(logs, title, iters=50):
    dflist = []

    j = 0
    for log in logs: # Node logs

        i = 0

        for iterlog in log[1:]: # Operations
            if i > iters:
                break
            start = iterlog['start']
            stop = iterlog['end']
            if j==0 and i==0:
                offset = start
            start = start - offset
            stop = stop - offset
            #start = s[i,j]
            #stop = start + t[j]
            dflist.append(dict(Task="Iter %s" % i, Start=start, Finish=stop, Resource="Node %s" % j))
            i += 1
        j += 1
    df = pd.DataFrame(dflist)
    df['delta'] = df['Finish'] - df['Start']
    # Export df to csv
    #df.to_csv(title+'.csv')

    fig = px.timeline(df, x_start="Start", x_end="Finish", y="Resource", color="Task")
    fig.update_yaxes(autorange="reversed") 

    fig.layout.xaxis.type = 'linear'
    for d in fig.data:
        filt = df['Task'] == d.name
        d.x = df[filt]['delta'].tolist()

    fig.update_layout(title_text=title)
    return fig, df



In [3]:
def meanTime(logs):
    '''Calculate the mean time of the operations'''
    
    i = 0
    for log in logs:
        mean = 0
        n = len(log)
        for iterlog in log:
            mean += iterlog['time']
        print('Node', i, mean/n)
        i += 1

def commsTime(log_df):
    '''Calculate the communication time of the operations'''
    
    # Find the latest stop time in group 0 in each iteration
    # Find the earliest start time in group 1 in each iteration
    # Find the differences
    commfwd = np.zeros(max(log_df['Iter']))
    commbwd = np.zeros(max(log_df['Iter']))
    maxtimes = np.zeros(max(log_df['Iter']))
    for j in range(max(log_df['Iter'])):
        filt0 = (log_df['Iter'] == j) & (log_df['Group'] == 0)
        filt1 = (log_df['Iter'] == j) & (log_df['Group'] == 1)
        filt2 = (log_df['Iter'] == j+1) & (log_df['Group'] == 0)
        start = min(log_df[filt1]['Start'])
        stop = max(log_df[filt0]['Stop'])
        nextstart = min(log_df[filt2]['Start'])
        nextstop = max(log_df[filt1]['Stop'])
        print('iter', j, 'Comm time', start- stop)
        commfwd[j] = start - stop
        commbwd[j] = nextstart - nextstop
        maxtime = max(log_df[log_df['Iter'] == j]['Delta'])
        maxtimes[j] = maxtime

    return commfwd, commbwd, maxtimes
    

In [11]:
# Build a dataframe from the logs
def buildDF(logs):
    '''Build a dataframe from the logs'''
    dflist = []
    j = 0
    for log in logs: # Node logs
        i = 0
        for iterlog in log[0:]: # Operations
            start = iterlog['start']
            stop = iterlog['end']
            delta = stop - start
            # Group is 0 if j is less than len(logs)/2, 1 otherwise
            group = 0 if j < len(logs)/2 else 1
            dflist.append(dict(Node=j, Iter=i, Start=start, Stop=stop, Delta=delta, Group=group))
            i += 1
        j += 1
    df = pd.DataFrame(dflist)
    return df


In [26]:
# Import log files 
n = 64
title = '.json' # rho 0.005 

logs = []
for node in range(n):
    logname = 'distributed_logs_'+str(node)+title
    with open(logname) as f:
        data = json.load(f)
    logs.append(data)


# logs is a list of lists with the following structure:
# log = [[node1_1, node1_2, ... ],[node2_1, node2_2, ...],...]
# where node1_1 is a dictionary for node 1 iter 1 with the following structure:
# node1_1 = {'start': 0, 'end': 0, 'value': 0, 'time': 0}



In [27]:
log_df = buildDF(logs)

In [28]:
fig, df = getDataGantt(logs, "SDP", iters=20)
fig.show()

In [29]:
meanTime(logs)

Node 0 0.45601449251174925
Node 1 0.18333324313163757
Node 2 0.18489542007446289
Node 3 0.0010088629722595215
Node 4 0.17080310010910035
Node 5 0.18064248871803285
Node 6 0.17646216869354248
Node 7 0.1837601125240326
Node 8 0.18645233845710754
Node 9 0.17927083992958068
Node 10 0.18054456639289856
Node 11 0.18085351800918578
Node 12 0.18382140040397643
Node 13 0.1810600323677063
Node 14 0.1747784116268158
Node 15 0.1781922299861908
Node 16 0.1850370442867279
Node 17 0.18250957942008972
Node 18 0.17699330520629883
Node 19 0.17951027131080627
Node 20 0.18319572973251344
Node 21 0.1757065098285675
Node 22 0.17923722410202025
Node 23 0.17387414574623108
Node 24 0.1795462055206299
Node 25 0.17009469032287597
Node 26 0.17865936756134032
Node 27 0.1817240459918976
Node 28 0.17612769436836243
Node 29 0.17852007579803467
Node 30 0.17493641185760497
Node 31 0.1771957550048828
Node 32 0.17290025901794434
Node 33 0.17556065368652343
Node 34 0.1829429316520691
Node 35 0.17835766887664795
Node 36 0.

In [30]:
fwdtimes, bwdtimes, maxtimes = commsTime(log_df)
print('Mean comm fwd', np.mean(fwdtimes))
print('Mean comm bwd', np.mean(bwdtimes))
print('Mean time', np.mean(maxtimes))
print('Max time', max(maxtimes))

iter 0 Comm time 0.1816697120666504
iter 1 Comm time 0.06747674942016602
iter 2 Comm time 0.027650117874145508
iter 3 Comm time 0.11902952194213867
iter 4 Comm time 0.008818387985229492
iter 5 Comm time 0.08414316177368164
iter 6 Comm time 0.026871919631958008
iter 7 Comm time 0.02474069595336914
iter 8 Comm time 0.05792856216430664
iter 9 Comm time 0.002289295196533203
iter 10 Comm time 0.08506965637207031
iter 11 Comm time 0.12319350242614746
iter 12 Comm time 0.01969289779663086
iter 13 Comm time 0.168182373046875
iter 14 Comm time 0.023805856704711914
iter 15 Comm time 0.03374528884887695
iter 16 Comm time 0.028429508209228516
iter 17 Comm time 0.02581930160522461
iter 18 Comm time 0.012595176696777344
iter 19 Comm time 0.0940694808959961
iter 20 Comm time 0.00039386749267578125
iter 21 Comm time 0.11060500144958496
iter 22 Comm time 0.07362174987792969
iter 23 Comm time 0.08239865303039551
iter 24 Comm time 0.058029890060424805
iter 25 Comm time 0.03019404411315918
iter 26 Comm ti