In [1]:
import sys
import os
import numpy
print(sys.executable)

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import importlib
seaborn_found = importlib.util.find_spec('seaborn')
if seaborn_found is None:
    sys.write.stderr("[error] Seaborn package not found. exit")
    exit(-1)

import seaborn as sns
import pandas as pd

import matplotlib as mpl

mpl.use("pgf")

import matplotlib.pyplot as plt

plt.rcParams.update({
    "font.family": "serif",  # use serif/main font for text elements
    "text.usetex": False,     # use inline math for ticks
    "pgf.rcfonts": False,    # don't setup fonts from rc parameters
    "pgf.preamble": [
         "\\usepackage{units}",          # load additional packages
         "\\usepackage{metalogo}",
         "\\usepackage{unicode-math}",   # unicode math setup
         r"\setmathfont{xits-math.otf}",
         ]
})

# plt.rc('text', usetex=False)

plt.rcParams.update({
    "font.family": "serif",
    "font.serif": [],                    # use latex default serif font
    "font.size": "18",
})

plt.rcParams.update({
    "pgf.texsystem": "pdflatex",
    "pgf.preamble": [
         r"\usepackage[utf8x]{inputenc}",
         r"\usepackage[T1]{fontenc}",
         r"\usepackage{cmbright}",
         ]
})


sns.set(style="ticks", color_codes=True)
sns.set_context("paper")
sns.set_style("darkgrid", {"axes.facecolor": ".9", 'xtick.bottom': True, 'xtick.top': False, 'ytick.left': True, 'ytick.right': False})

mpl.pyplot.close("all")



/usr/local/opt/python/bin/python3.7


In [2]:
DIR="/Users/lpottier/research/usc-isi/projects/workflow-io-bb/simulation/output/"
# csv_file = DIR+"/simu-bb-runs2020-1c.csv"
# csv_file = DIR+"/simu-bb_runs2020-Xc.csv"
csv_file = DIR+"/simu-bb_runs2020-multipipeline-1C.csv"

CORE="1"
PLOT_DIR="/Users/lpottier/research/usc-isi/projects/paper-workflow-bb/figures/plots/simu-swarp"
OUTPUT = PLOT_DIR+"/"+"bb_runs2020-multipipeline-1C/"

In [3]:

try:
    # Create target Directory
    os.mkdir(OUTPUT)
    print("Directory " , OUTPUT ,  " Created ") 
except FileExistsError:
    print("Directory " , OUTPUT ,  " already exists")


Directory  /Users/lpottier/research/usc-isi/projects/paper-workflow-bb/figures/plots/simu-swarp/bb_runs2020-multipipeline-1C/  already exists


In [4]:
df = pd.read_csv(csv_file, sep=' ')
print(df.head())
# df.rename(columns={"FILES": "BB_NB_FILES", "CORES": "NB_CORES"})


df.agg = df.groupby(['ID', 'NB_PIPELINE','FITS', 'BB_TYPE', 'BB_NB_FILES','AVG', 'NB_CORES'], as_index=False).agg({'MEASURED_MKSP_S': 'max', 'ERR_MKSP': 'max', 'SIMULATION_S': 'max', 'MEASURED_WALLTIME_S': 'max', 'ERR_WALLTIME': 'max'})

         ID  AVG FITS  BB_TYPE      WORKFLOW  PLATFORM  NB_PIPELINE  PIPELINE  \
0  28171221    1    Y  PRIVATE   swarp-0.dax  cori.xml           16         1   
1  28171221    1    Y  PRIVATE   swarp-1.dax  cori.xml           16         1   
2  28171221    1    Y  PRIVATE  swarp-10.dax  cori.xml           16        10   
3  28171221    1    Y  PRIVATE  swarp-11.dax  cori.xml           16        11   
4  28171221    1    Y  PRIVATE  swarp-12.dax  cori.xml           16        12   

   NB_CORES  BB_NB_FILES  DATA_MB   LATENCY  BANDWITH  SIMULATION_S  \
0         1           32  768.516  0.117647   2.32143       148.323   
1         1           32  768.516  0.117647   2.32143       146.984   
2         1           32  768.516  0.117647   2.32143       147.638   
3         1           32  768.516  0.117647   2.32143       146.437   
4         1           32  768.516  0.117647   2.32143       147.972   

   MEASURED_MKSP_S  ERR_MKSP  MEASURED_WALLTIME_S  ERR_WALLTIME  
0          160.569  

In [5]:
df.agg.head()

Unnamed: 0,ID,NB_PIPELINE,FITS,BB_TYPE,BB_NB_FILES,AVG,NB_CORES,MEASURED_MKSP_S,ERR_MKSP,SIMULATION_S,MEASURED_WALLTIME_S,ERR_WALLTIME
0,28171216,1,Y,PRIVATE,32,1,1,117.903,0.034509,113.834,124.981,0.089188
1,28171216,1,Y,PRIVATE,32,2,1,115.369,0.019691,113.098,124.746,0.093378
2,28171216,1,Y,PRIVATE,32,3,1,116.286,0.02576,113.291,124.924,0.093125
3,28171216,1,Y,PRIVATE,32,4,1,116.588,0.026946,113.446,126.448,0.102821
4,28171216,1,Y,PRIVATE,32,5,1,115.949,0.02222,113.373,138.705,0.182632


In [6]:
df.real = pd.read_csv("/Users/lpottier/research/usc-isi/projects/workflow-io-bb/simulation/data/trace-files/swarp/shared-cori/bb_runs2020-multipipeline-1C.csv", sep=' ')

big = pd.merge(df, df.real, on=['ID', 'NB_PIPELINE','FITS', 'BB_TYPE', 'AVG', 'NB_CORES'])

bigagg = big.groupby(['ID', 'NB_PIPELINE','FITS', 'BB_TYPE', 'AVG', 'NB_CORES'], as_index=False).agg({'MEASURED_MKSP_S': 'max', 'MAKESPAN_S': 'max', 'SIMULATION_S': 'max', 'MEASURED_WALLTIME_S': 'max', 'ERR_WALLTIME': 'max'})


  """Entry point for launching an IPython kernel.


In [7]:
bigagg['ERR'] = (abs(bigagg['MAKESPAN_S'] - bigagg['SIMULATION_S']) / bigagg['SIMULATION_S']) *100
bigagg.head()

Unnamed: 0,ID,NB_PIPELINE,FITS,BB_TYPE,AVG,NB_CORES,MEASURED_MKSP_S,MAKESPAN_S,SIMULATION_S,MEASURED_WALLTIME_S,ERR_WALLTIME,ERR
0,28171216,1,Y,PRIVATE,1,1,117.903,118.37686,113.834,124.981,0.089188,3.990776
1,28171216,1,Y,PRIVATE,2,1,115.369,115.797671,113.098,124.746,0.093378,2.387019
2,28171216,1,Y,PRIVATE,3,1,116.286,116.324594,113.291,124.924,0.093125,2.677701
3,28171216,1,Y,PRIVATE,4,1,116.588,118.126835,113.446,126.448,0.102821,4.126047
4,28171216,1,Y,PRIVATE,5,1,115.949,116.434963,113.373,138.705,0.182632,2.700786


In [8]:
print(df.head())
print("BB_NB_FILES :", df.BB_NB_FILES.unique())
print("BB_TYPE     :", df.BB_TYPE.unique())
print("BB_TYPE     :", df.FITS.unique())
print("NB_PIPELINE :", df.NB_PIPELINE.unique())

         ID  AVG FITS  BB_TYPE      WORKFLOW  PLATFORM  NB_PIPELINE  PIPELINE  \
0  28171221    1    Y  PRIVATE   swarp-0.dax  cori.xml           16         1   
1  28171221    1    Y  PRIVATE   swarp-1.dax  cori.xml           16         1   
2  28171221    1    Y  PRIVATE  swarp-10.dax  cori.xml           16        10   
3  28171221    1    Y  PRIVATE  swarp-11.dax  cori.xml           16        11   
4  28171221    1    Y  PRIVATE  swarp-12.dax  cori.xml           16        12   

   NB_CORES  BB_NB_FILES  DATA_MB   LATENCY  BANDWITH  SIMULATION_S  \
0         1           32  768.516  0.117647   2.32143       148.323   
1         1           32  768.516  0.117647   2.32143       146.984   
2         1           32  768.516  0.117647   2.32143       147.638   
3         1           32  768.516  0.117647   2.32143       146.437   
4         1           32  768.516  0.117647   2.32143       147.972   

   MEASURED_MKSP_S  ERR_MKSP  MEASURED_WALLTIME_S  ERR_WALLTIME  
0          160.569  

In [9]:
xlabel='Number of pipelines'
ylabel='Execution time (s)'

# name = col.split("_")[0].lower()

is_PRIVATE = bigagg['BB_TYPE']=="PRIVATE"

f = plt.figure(figsize=(4, 3))

g = sns.lineplot(x="NB_PIPELINE", y="MAKESPAN_S", data=bigagg[is_PRIVATE],  color="b", markers=True)
g = sns.lineplot(x="NB_PIPELINE", y="SIMULATION_S", data=bigagg[is_PRIVATE],  color="r", markers=True)

# ax2 = g.twinx()
# sns.lineplot(x="NB_PIPELINE", y="ERR", data=bigagg[is_PRIVATE], markers='x', ax=ax2, ci=None, color="black")

g.legend(loc='upper left', title='', labels=["Measured makespan", "Simulated makespan"])

# ax2.legend(loc='lower right', title='', labels=["Error"])
# ax2.set(ylabel="Error (%)")
g.set(xlabel=xlabel, ylabel=ylabel, xticks=bigagg.NB_PIPELINE.unique())
# plt.grid()

plt.tight_layout()
plt.savefig(OUTPUT+'swarp-simu-private-mksp-pipeline.pdf')
plt.savefig(OUTPUT+'swarp-simu-private-mksp-pipeline.pgf')
plt.show()



In [19]:
print("min error",
    min(bigagg[is_PRIVATE]['ERR']),
    ", avg error",
    numpy.mean(bigagg[is_PRIVATE]['ERR']),
    ", sd error",
    numpy.std(bigagg[is_PRIVATE]['ERR']),
    ", max error",
    max(bigagg[is_PRIVATE]['ERR']),
)

min error 1.7437664227745038 , avg error 6.02541057459444 , sd error 4.28390764168278 , max error 36.206739015124235


In [10]:
xlabel='Number of pipelines'
ylabel='Execution time (s)'

# name = col.split("_")[0].lower()

f = plt.figure(figsize=(4, 3))

g = sns.lineplot(x="NB_PIPELINE", y="MAKESPAN_S", data=bigagg[~is_PRIVATE],  color="b", markers=True)
g = sns.lineplot(x="NB_PIPELINE", y="SIMULATION_S", data=bigagg[~is_PRIVATE],  color="r", markers=True)

# ax2 = g.twinx()
# sns.lineplot(x="NB_PIPELINE", y="ERR", data=bigagg[~is_PRIVATE], markers='x', ax=ax2, ci=None, color="black")

# ax2.legend(loc='lower right', title='', labels=["Error"])
# ax2.set(ylabel="Error (%)")

g.legend(loc='upper left', title='', labels=["Measured makespan", "Simulated makespan"])
g.set(xlabel=xlabel, ylabel=ylabel, xticks=bigagg.NB_PIPELINE.unique())
# plt.grid()
plt.tight_layout()
plt.savefig(OUTPUT+'swarp-simu-striped-mksp-pipeline.pdf')
plt.savefig(OUTPUT+'swarp-simu-striped-mksp-pipeline.pgf')
plt.show()



In [20]:
print("min error",
    min(bigagg[~is_PRIVATE]['ERR']),
    ", avg error",
    numpy.mean(bigagg[~is_PRIVATE]['ERR']),
    ", sd error",
    numpy.std(bigagg[~is_PRIVATE]['ERR']),
    ", max error",
    max(bigagg[~is_PRIVATE]['ERR']),
)

min error 7.193786742077829 , avg error 11.81031162046155 , sd error 2.2367719199878575 , max error 18.348975993435506
