In [1]:
import pandas as pd

batches = {
    "batch0" : "large streams, small tasks",
    "batch1" : "large streams, large tasks",
    "batch2" : "small streams, small tasks",
    "batch3" : "small streams, large tasks",
}

endings = {
    "noboth" : (False, False),
    "nosec" : (False, True),
    "nored" : (True, False),
    "" : (True, True)
}

dfs = {}

for batch_name, description in batches.items():
    for ending, tpl in endings.items():
        df = pd.read_csv(f"{batch_name}_{ending}_cp.csv")
        
        if batch_name not in dfs:
            dfs[batch_name] = {}
            dfs[batch_name][ending] = df[["Testcase", "Total cost", 'Bandwidth use (Mean,%)', 'CPU use (Mean,%)']]
        else:
            dfs[batch_name][ending] = df[["Testcase", "Total cost", 'Bandwidth use (Mean,%)', 'CPU use (Mean,%)']]

In [11]:
dfs["batch0"]["noboth"].sort_values(by="Testcase")

Unnamed: 0,Testcase,Total cost,"Bandwidth use (Mean,%)","CPU use (Mean,%)"
8,batch00,5101,0.059976,1.367125
4,batch01,4875,0.096748,1.303292
2,batch010,3581,0.105948,1.320583
3,batch011,5158,0.069089,1.279833
24,batch012,6221,0.091371,1.613375
13,batch013,4591,0.088169,1.472188
5,batch014,3670,0.106122,1.416833
23,batch015,4429,0.059306,1.252313
18,batch016,2812,0.134214,1.291875
16,batch017,5572,0.044321,1.456167


In [10]:
dfs["batch0"]["nosec"].sort_values(by="Testcase")

Unnamed: 0,Testcase,Total cost,"Bandwidth use (Mean,%)","CPU use (Mean,%)"
0,batch00,5093,0.059976,1.367125
14,batch01,4875,0.096748,1.303292
16,batch010,3382,0.157668,1.320583
18,batch011,5245,0.073681,1.279833
24,batch012,6066,0.097393,1.613375
15,batch013,4762,0.099569,1.472188
8,batch014,3866,0.132517,1.416833
23,batch015,4397,0.074061,1.252313
13,batch016,2790,0.203995,1.291875
7,batch017,5668,0.064632,1.456167


In [3]:
mean_bw = {}
mean_cpu = {}
mean_cost = {}
for batch_name, description in batches.items():
    for ending, tpl in endings.items():
        df = dfs[batch_name][ending]
        
        if batch_name not in mean_bw:
            mean_bw[batch_name] = {}
            mean_bw[batch_name][ending] = df["Bandwidth use (Mean,%)"].mean()
        else:
            mean_bw[batch_name][ending] = df["Bandwidth use (Mean,%)"].mean()
        
        if batch_name not in mean_cpu:
            mean_cpu[batch_name] = {}
            mean_cpu[batch_name][ending] = df["CPU use (Mean,%)"].mean()
        else:
            mean_cpu[batch_name][ending] = df["CPU use (Mean,%)"].mean()
        
        if batch_name not in mean_cost:
            mean_cost[batch_name] = {}
            mean_cost[batch_name][ending] = df["Total cost"].mean()
        else:
            mean_cost[batch_name][ending] = df["Total cost"].mean()

In [4]:
columns = ["Batch name", "Security", "Redundancy", "Cost", "Bandwidth", "CPU"]

df_result = pd.DataFrame([], columns = columns)

i = 0
for batch_name, description in batches.items():
    for ending, tpl in endings.items():
        df = dfs[batch_name][ending]
        security = "yes" if tpl[0] else "no"
        redundancy = "yes" if tpl[1] else "no"
        name = f"{batch_name} - {description}" if i % 4 == 0 else ""
        
        
        if i % 4 == 0:
            cost = mean_cost[batch_name][ending]
            cost = float("{0:.2f}".format(cost))
            bandwidth = mean_bw[batch_name][ending]
            bandwidth = float("{0:.2f}".format(bandwidth))
            cpu = mean_cpu[batch_name][ending]
            cpu = float("{0:.2f}".format(cpu))
        else:
            v2 = mean_cost[batch_name][ending]
            v1 = mean_cost[batch_name]["noboth"]
            cost = "+{0:.2f}%".format((v2-v1)/v1 * 100)
            
            v2 = mean_bw[batch_name][ending]
            v1 = mean_bw[batch_name]["noboth"]
            bandwidth = "+{0:.2f}%".format((v2-v1)/v1 * 100)
            
            v2 = mean_cpu[batch_name][ending]
            v1 = mean_cpu[batch_name]["noboth"]
            cpu = "+{0:.2f}%".format((v2-v1)/v1 * 100)
        
        df_result.loc[i] = [name, security, redundancy, cost, bandwidth, cpu]
        i += 1
df_result

Unnamed: 0,Batch name,Security,Redundancy,Cost,Bandwidth,CPU
0,"batch0 - large streams, small tasks",no,no,4465.04,0.09,1.44
1,,no,yes,+-0.04%,+26.66%,+0.00%
2,,yes,no,+22.20%,+4.60%,+12.99%
3,,yes,yes,+25.27%,+36.50%,+15.93%
4,"batch1 - large streams, large tasks",no,no,17904.6,0.07,7.17
5,,no,yes,+1.06%,+17.57%,+-0.10%
6,,yes,no,+4.89%,+2.91%,+1.38%
7,,yes,yes,+5.14%,+22.09%,+1.73%
8,"batch2 - small streams, small tasks",no,no,17176.5,0.07,6.87
9,,no,yes,+0.68%,+21.46%,+0.00%


In [16]:
df = dfs["batch0"][""]
df

Unnamed: 0,Total cost,"Bandwidth use (Mean,%)","CPU use (Mean,%)"
0,1443,0.052319,1.75625
1,1471,0.035666,2.173437
2,1741,0.068471,2.421875
3,1668,0.056689,2.31875
4,2051,0.083084,2.7125
5,1369,0.095284,4.229687
6,1925,0.048053,2.904688
7,3053,0.060029,3.592188
8,2718,0.074814,3.310938
9,1698,0.055294,2.326563


In [17]:
df["Bandwidth use (Mean,%)"].mean()

0.062296314285714284