In [14]:
import pandas as pd

batches = {
    "batch0" : "large streams, small tasks",
    "batch1" : "large streams, large tasks",
    "batch2" : "small streams, small tasks",
    "batch3" : "small streams, large tasks",
}

endings = {
    "noboth" : (False, False),
    "nosec" : (False, True),
    "nored" : (True, False),
    "" : (True, True)
}

dfs = {}

for batch_name, description in batches.items():
    for ending, tpl in endings.items():
        df = pd.read_csv(f"{batch_name}_{ending}_cp.csv")
        
        if batch_name not in dfs:
            dfs[batch_name] = {}
            dfs[batch_name][ending] = df[["Testcase", "Total cost", 'Bandwidth use (Mean,%)', 'CPU use (Mean,%)']]
        else:
            dfs[batch_name][ending] = df[["Testcase", "Total cost", 'Bandwidth use (Mean,%)', 'CPU use (Mean,%)']]

In [32]:
dfs["batch0"][""].sort_values(by="Testcase")

Unnamed: 0,Testcase,Total cost,"Bandwidth use (Mean,%)","CPU use (Mean,%)"
0,batch00,5193,0.061291,1.427125
6,batch01,4944,0.098334,1.378292
18,batch010,4101,0.178117,1.801
3,batch011,5268,0.075593,1.336083
5,batch013,4964,0.105599,1.634688
12,batch014,6371,0.145023,1.760583
17,batch016,5450,0.218928,1.754375
13,batch017,5829,0.068807,1.572417
15,batch018,12954,0.104006,1.573125
7,batch019,3744,0.098963,1.143958


In [15]:
mean_bw = {}
mean_cpu = {}
mean_cost = {}
for batch_name, description in batches.items():
    for ending, tpl in endings.items():
        df = dfs[batch_name][ending]
        
        if batch_name not in mean_bw:
            mean_bw[batch_name] = {}
            mean_bw[batch_name][ending] = df["Bandwidth use (Mean,%)"].mean()
        else:
            mean_bw[batch_name][ending] = df["Bandwidth use (Mean,%)"].mean()
        
        if batch_name not in mean_cpu:
            mean_cpu[batch_name] = {}
            mean_cpu[batch_name][ending] = df["CPU use (Mean,%)"].mean()
        else:
            mean_cpu[batch_name][ending] = df["CPU use (Mean,%)"].mean()
        
        if batch_name not in mean_cost:
            mean_cost[batch_name] = {}
            mean_cost[batch_name][ending] = df["Total cost"].mean()
        else:
            mean_cost[batch_name][ending] = df["Total cost"].mean()

In [24]:
columns = ["Batch name", "Security", "Redundancy", "Cost", "Bandwidth", "CPU"]

df_result = pd.DataFrame([], columns = columns)

i = 0
for batch_name, description in batches.items():
    for ending, tpl in endings.items():
        df = dfs[batch_name][ending]
        security = "yes" if tpl[0] else "no"
        redundancy = "yes" if tpl[1] else "no"
        name = f"{batch_name} - {description}" if i % 4 == 0 else ""
        
        
        if i % 4 == 0:
            cost = mean_cost[batch_name][ending]
            cost = float("{0:.2f}".format(cost))
            bandwidth = mean_bw[batch_name][ending]
            bandwidth = float("{0:.2f}".format(bandwidth))
            cpu = mean_cpu[batch_name][ending]
            cpu = float("{0:.2f}".format(cpu))
        else:
            v2 = mean_cost[batch_name][ending]
            v1 = mean_cost[batch_name]["noboth"]
            cost = "+{0:.2f}%".format((v2-v1)/v1 * 100)
            
            v2 = mean_bw[batch_name][ending]
            v1 = mean_bw[batch_name]["noboth"]
            bandwidth = "+{0:.2f}%".format((v2-v1)/v1 * 100)
            
            v2 = mean_cpu[batch_name][ending]
            v1 = mean_cpu[batch_name]["noboth"]
            cpu = "+{0:.2f}%".format((v2-v1)/v1 * 100)
        
        df_result.loc[i] = [name, security, redundancy, cost, bandwidth, cpu]
        i += 1
df_result

Unnamed: 0,Batch name,Security,Redundancy,Cost,Bandwidth,CPU
0,"batch0 - large streams, small tasks",no,no,4460.52,0.09,1.44
1,,no,yes,+0.25%,+26.66%,+0.00%
2,,yes,no,+22.37%,+4.60%,+12.99%
3,,yes,yes,+24.00%,+38.65%,+16.04%
4,"batch1 - large streams, large tasks",no,no,18097.8,0.08,7.51
5,,no,yes,+1.08%,+16.87%,+0.00%
6,,yes,no,+-1.21%,+3.58%,+0.33%
7,,yes,yes,+-1.07%,+24.28%,+0.73%
8,"batch2 - small streams, small tasks",no,no,19405.6,0.08,7.34
9,,no,yes,+0.71%,+21.42%,+-0.00%


In [16]:
df = dfs["batch0"][""]
df

Unnamed: 0,Total cost,"Bandwidth use (Mean,%)","CPU use (Mean,%)"
0,1443,0.052319,1.75625
1,1471,0.035666,2.173437
2,1741,0.068471,2.421875
3,1668,0.056689,2.31875
4,2051,0.083084,2.7125
5,1369,0.095284,4.229687
6,1925,0.048053,2.904688
7,3053,0.060029,3.592188
8,2718,0.074814,3.310938
9,1698,0.055294,2.326563


In [17]:
df["Bandwidth use (Mean,%)"].mean()

0.062296314285714284