In [9]:
import pandas as pd

batches = {
    "batch0" : "large streams, small tasks",
    "batch1" : "large streams, large tasks",
    "batch2" : "small streams, small tasks",
    "batch3" : "small streams, large tasks",
}

endings = {
    "noboth" : (False, False),
    "nosec" : (False, True),
    "nored" : (True, False),
    "" : (True, True)
}

dfs = {}

for batch_name, description in batches.items():
    for ending, tpl in endings.items():
        df = pd.read_csv(f"{batch_name}_{ending}.csv")
        
        if batch_name not in dfs:
            dfs[batch_name] = {}
            dfs[batch_name][ending] = df[["Testcase", "Total cost", 'Bandwidth use (Mean,%)', 'CPU use (Mean,%)']]
        else:
            dfs[batch_name][ending] = df[["Testcase", "Total cost", 'Bandwidth use (Mean,%)', 'CPU use (Mean,%)']]

In [10]:
dfs["batch0"]["noboth"].sort_values(by="Testcase")

Unnamed: 0,Testcase,Total cost,"Bandwidth use (Mean,%)","CPU use (Mean,%)"
0,batch00,4515,0.061025,1.367125
1,batch01,4427,0.113426,1.303292
10,batch010,2379,0.120012,1.320583
11,batch011,4935,0.076875,1.279833
12,batch012,4722,0.094417,1.613375
13,batch013,4550,0.094917,1.472187
14,batch014,3731,0.114056,1.416833
15,batch015,4732,0.060403,1.252312
16,batch016,2809,0.162593,1.291875
17,batch017,3884,0.044515,1.456167


In [12]:
dfs["batch0"]["nored"].sort_values(by="Testcase")

Unnamed: 0,Testcase,Total cost,"Bandwidth use (Mean,%)","CPU use (Mean,%)"
0,batch00,5883,0.061025,1.367125
1,batch01,6172,0.113426,1.303292
10,batch010,3805,0.173043,1.320583
11,batch011,5601,0.081505,1.279833
12,batch012,7984,0.099769,1.613375
13,batch013,6688,0.106583,1.472187
14,batch014,4794,0.144889,1.416833
15,batch015,6661,0.075995,1.252312
16,batch016,4219,0.232778,1.291875
17,batch017,6570,0.064917,1.456167


In [13]:
mean_bw = {}
mean_cpu = {}
mean_cost = {}
for batch_name, description in batches.items():
    for ending, tpl in endings.items():
        df = dfs[batch_name][ending]
        
        if batch_name not in mean_bw:
            mean_bw[batch_name] = {}
            mean_bw[batch_name][ending] = df["Bandwidth use (Mean,%)"].mean()
        else:
            mean_bw[batch_name][ending] = df["Bandwidth use (Mean,%)"].mean()
        
        if batch_name not in mean_cpu:
            mean_cpu[batch_name] = {}
            mean_cpu[batch_name][ending] = df["CPU use (Mean,%)"].mean()
        else:
            mean_cpu[batch_name][ending] = df["CPU use (Mean,%)"].mean()
        
        if batch_name not in mean_cost:
            mean_cost[batch_name] = {}
            mean_cost[batch_name][ending] = df["Total cost"].mean()
        else:
            mean_cost[batch_name][ending] = df["Total cost"].mean()

In [14]:
columns = ["Batch name", "Security", "Redundancy", "Cost", "Bandwidth", "CPU"]

df_result = pd.DataFrame([], columns = columns)

i = 0
for batch_name, description in batches.items():
    for ending, tpl in endings.items():
        df = dfs[batch_name][ending]
        security = "yes" if tpl[0] else "no"
        redundancy = "yes" if tpl[1] else "no"
        name = f"{batch_name} - {description}" if i % 4 == 0 else ""
        
        
        if i % 4 == 0:
            cost = mean_cost[batch_name][ending]
            cost = float("{0:.2f}".format(cost))
            bandwidth = mean_bw[batch_name][ending]
            bandwidth = float("{0:.2f}".format(bandwidth))
            cpu = mean_cpu[batch_name][ending]
            cpu = float("{0:.2f}".format(cpu))
        else:
            v2 = mean_cost[batch_name][ending]
            v1 = mean_cost[batch_name]["noboth"]
            cost = "+{0:.2f}%".format((v2-v1)/v1 * 100)
            
            v2 = mean_bw[batch_name][ending]
            v1 = mean_bw[batch_name]["noboth"]
            bandwidth = "+{0:.2f}%".format((v2-v1)/v1 * 100)
            
            v2 = mean_cpu[batch_name][ending]
            v1 = mean_cpu[batch_name]["noboth"]
            cpu = "+{0:.2f}%".format((v2-v1)/v1 * 100)
        
        df_result.loc[i] = [name, security, redundancy, cost, bandwidth, cpu]
        i += 1
df_result

Unnamed: 0,Batch name,Security,Redundancy,Cost,Bandwidth,CPU
0,"batch0 - large streams, small tasks",no,no,3822.08,0.09,1.44
1,,no,yes,+0.27%,+0.66%,+0.00%
2,,yes,no,+39.94%,+25.45%,+0.00%
3,,yes,yes,+258.10%,+36.64%,+15.93%
4,"batch1 - large streams, large tasks",no,no,17721.3,0.08,7.06
5,,no,yes,+-0.00%,+0.47%,+0.00%
6,,yes,no,+30.59%,+15.91%,+-0.05%
7,,yes,yes,+64.98%,+22.16%,+1.75%
8,"batch2 - small streams, small tasks",no,no,16235.2,0.08,6.76
9,,no,yes,+0.01%,+0.64%,+0.00%


In [7]:
df_result.to_csv("impact_results.csv")

In [17]:
df["Bandwidth use (Mean,%)"].mean()

0.062296314285714284