## Aggregate all necessary output files into reference datasets

### 0. Import necessary packages

In [1]:
import matsim
import pandas as pd
import numpy as np
import xml.etree.ElementTree as ET
import gzip
import duckdb
import csv

## 1. Runtime


####  Import run_info.csv for all sample sizes, alpha and stuck-time values and create data frames with relevant specifications:

#### 1 pct


In [2]:
def calculateRuntimeAndReturnDfRow(pathToFile,sampleSize, sampleNr, alpha_value, stuckTime, globalSeed):
    temp = pd.read_csv(pathToFile, sep=",")
    start= pd.Timestamp(temp.iloc[6,1])
    end= pd.Timestamp(temp.iloc[7,1])
    diff = end - start
    rt = pd.DataFrame({'runtime': diff, 'sample_size': sampleSize, 'sample_nr': sampleNr, 'alpha': alpha_value, 'stuck_time': stuckTime, 'global_seed': globalSeed}, index = [0])
    return rt
    

In [3]:
flowCapF = ["0.01"]
storCapF =  ["0.01", "0.03162"]

runtimes_1pct = pd.DataFrame()

for fCf in flowCapF:
    for sCf in storCapF:
        for sampleNr in range(1,11,1):
            # calculate adjusted stuck time
            default_stuck_time = 30.0
            adjusted_stuck_time = 30.0/float(flowCapF[0])
            # declare sample size as str "1-pct"
            sample_size_as_string = str(int(float(fCf)*100)) + "-pct"

            if ((fCf == "0.01") & (sCf == '0.01')):
                alpha = 1.0
                path_case1 = "/home/lola/math_cluster/output/output-lausitz-1pct-" + str(sampleNr) + "-fCf_sCF_" + sCf + "_gS_default_3765/analysis/general/run_info.csv"
                rt_case1= calculateRuntimeAndReturnDfRow(path_case1, sample_size_as_string, sampleNr,  alpha, default_stuck_time, 'default')

                path_case3 = "/home/lola/math_cluster/output/output-lausitz-1-pct-"+str(sampleNr)+"-fCf_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/analysis/general/run_info.csv"
                rt_case3 = calculateRuntimeAndReturnDfRow(path_case3, sample_size_as_string, sampleNr, alpha, adjusted_stuck_time, 'default')
                runtimes_1pct = pd.concat([runtimes_1pct, rt_case1, rt_case3], ignore_index = True)
            else:
                alpha = 0.75
                path_case2 = "/home/lola/math_cluster/output/output-lausitz-1pct-" + str(sampleNr) + "-fCf_0.01_sCF_" +sCf + "_gS_default_3765/analysis/general/run_info.csv"
                rt_case2 = calculateRuntimeAndReturnDfRow(path_case2, sample_size_as_string, sampleNr, alpha, default_stuck_time, 'default')

                path_case4 = "/home/lola/math_cluster/output/output-lausitz-1-pct-"+str(sampleNr)+"-fCf_" + fCf + "_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/analysis/general/run_info.csv"
                rt_case4 = calculateRuntimeAndReturnDfRow(path_case4, sample_size_as_string, sampleNr,  alpha, adjusted_stuck_time, 'default')
                runtimes_1pct = pd.concat([runtimes_1pct, rt_case2, rt_case4], ignore_index = True)




In [10]:
run_time_1pct_rGs = pd.DataFrame()

rGs = [ 4711,3254, 2306, 6384,4338, 6003, 5502, 9377, 5621, 9002 ]
for seed in rGs:
    if (seed == 4711):
        global_seed = "rnd_" + str(seed)
        rt_tmp = runtimes_1pct[(runtimes_1pct['alpha']== 1) & (runtimes_1pct['stuck_time'] == 30.0) & (runtimes_1pct['sample_nr']==1)]['runtime']
        temp = {'runtime': runtimes_1pct["runtime"].iloc[0], 'sample_size': '1-pct', 'alpha': 1.0, 'stuck_time': 30.0, 'global_seed': global_seed  }
        temp = pd.DataFrame(data=temp, index=[rGs.index(4711)])
        run_time_1pct_rGs = pd.concat([run_time_1pct_rGs, temp])
    elif (seed == 3254):
        path ="/home/lola/math_cluster/output/output-lausitz-1pct-1-fCf_sCF_0.01_gS_3254_3765/analysis/general/run_info.csv"
        temp = pd.read_csv(path, sep = ",")
        start= pd.Timestamp(temp.iloc[6,1])
        end= pd.Timestamp(temp.iloc[7,1])
        diff = end - start
        temp2 = pd.DataFrame({'runtime': diff, 'sample_size': '1-pct', 'alpha': 1.0, 'stuck_time': 30.0, 'global_seed': "rnd_" + str(seed)  }, index = [rGs.index(3254)])
        run_time_1pct_rGs = pd.concat([run_time_1pct_rGs, temp2], axis = 0)
    else:
        path = "/home/lola/math_cluster/output/output-lausitz-1.0-pct-1-fCf_sCF_0.01_gS_"+str(seed) + "_3765/analysis/general/run_info.csv"
        temp = pd.read_csv(path, sep = ",")
        start= pd.Timestamp(temp.iloc[6,1])
        end= pd.Timestamp(temp.iloc[7,1])
        diff = end - start
        temp2 = pd.DataFrame({'runtime': diff, 'sample_size': '1-pct', 'alpha': 1.0, 'stuck_time': 30.0, 'global_seed': "rnd_" + str(seed)  }, index = [rGs.index(seed)] )
        run_time_1pct_rGs = pd.concat([run_time_1pct_rGs, temp2], axis = 0)

#### 5pct

In [192]:
run_time_5pct = []
for elem in range(1,11,1):
    path = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-" + str(elem) + "-fCf_sCF_0.05_gS_4711_3765/analysis/general/run_info.csv"
    temp = pd.read_csv(path, sep=",")
    start= pd.Timestamp(temp.iloc[6,1])
    end= pd.Timestamp(temp.iloc[7,1])
    diff = end - start
    run_time_5pct.append(diff)
run_time_5pct = pd.DataFrame({'runtime': run_time_5pct})
run_time_5pct.insert(1, 'sample_size', "5-pct")
run_time_5pct.insert(2, 'alpha', 1.0)
run_time_5pct.insert(3, 'stuck_time', 30.0)
run_time_5pct.insert(4,'global_seed', "default") 

In [193]:
run_time_5pct_sCf = []
for elem in range(1,11,1):
    path = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-" + str(elem) + "-fCf_0.05_sCF_0.10574_gS_4711_3765/analysis/general/run_info.csv"
    temp = pd.read_csv(path, sep=",")
    start= pd.Timestamp(temp.iloc[6,1])
    end= pd.Timestamp(temp.iloc[7,1])
    diff = end - start
    run_time_5pct_sCf.append(diff)
run_time_5pct_sCf = pd.DataFrame({'runtime': run_time_5pct_sCf})
run_time_5pct_sCf.insert(1, 'sample_size', "5-pct")
run_time_5pct_sCf.insert(2, 'alpha', 0.75)
run_time_5pct_sCf.insert(3, 'stuck_time', 30.0)
run_time_5pct_sCf.insert(4,'global_seed', "default")

In [194]:
run_time_5pct_rGs = pd.DataFrame()
rGs = [4711, 3254, 2306, 6384,4338, 6003, 5502, 9377, 5621, 9002 ]

for seed in rGs:
    if (seed ==4711):
        global_seed = "rnd_" + str(seed)
        temp = {'runtime': run_time_5pct["runtime"].iloc[0], 'sample_size': '5-pct', 'alpha': 1.0, 'stuck_time': 30.0, 'global_seed': global_seed  }
        temp = pd.DataFrame(data=temp, index=[rGs.index(4711)])
        run_time_5pct_rGs = pd.concat([run_time_5pct_rGs, temp])
    else:
        path = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-1-fCf_sCF_0.05_gS_"+ str(seed) +"_3765/analysis/general/run_info.csv"
        temp = pd.read_csv(path, sep = ",")
        start= pd.Timestamp(temp.iloc[6,1])
        end= pd.Timestamp(temp.iloc[7,1])
        diff = end - start
        temp2 = pd.DataFrame({'runtime': diff, 'sample_size': '5-pct', 'alpha': 1.0, 'stuck_time': 30.0, 'global_seed': "rnd_" + str(seed)  }, index = [rGs.index(seed)])
        run_time_5pct_rGs = pd.concat([run_time_5pct_rGs, temp2], axis = 0)

In [195]:
run_time_5pct_sT = []
for elem in range(1,11,1):
    path = "/home/lola/math_cluster/output/output-lausitz-5-pct-"+ str(elem) +"-fCf_sCF_0.05_gS_4711_sT_600.0_3765/analysis/general/run_info.csv"
    temp = pd.read_csv(path, sep = ",")
    start= pd.Timestamp(temp.iloc[6,1])
    end= pd.Timestamp(temp.iloc[7,1])
    diff = end - start
    run_time_5pct_sT.append(diff)
run_time_5pct_sT = pd.DataFrame({'runtime': run_time_5pct_sT})
run_time_5pct_sT.insert(1, 'sample_size', "5-pct")
run_time_5pct_sT.insert(2, 'alpha', 1.0)
run_time_5pct_sT.insert(3, 'stuck_time', 600.0)
run_time_5pct_sT.insert(4,'global_seed', "default")

In [None]:
run_time_5pct_sT_sCf = []
for elem in range(1,11,1):
    path = "/home/lola/math_cluster/output/output-lausitz-5-pct-"+str(elem)+"-fCf_0.05_sCF_0.10574_gS_4711_sT_600.0_3765/analysis/general/run_info.csv"
    temp = pd.read_csv(path,sep = ",")
    start= pd.Timestamp(temp.iloc[6,1])
    end= pd.Timestamp(temp.iloc[7,1])
    diff = end - start
    run_time_5pct_sT_sCf.append(diff)
run_time_5pct_sT_sCf = pd.DataFrame({'runtime': run_time_5pct_sT_sCf})
run_time_5pct_sT_sCf.insert(1, 'sample_size', "5-pct")
run_time_5pct_sT_sCf.insert(2, 'alpha', 0.75)
run_time_5pct_sT_sCf.insert(3, 'stuck_time', 600.0)
run_time_5pct_sT_sCf.insert(4,'global_seed', "default")
    

#### 10 pct #### 


In [None]:
run_time_10pct = []
for elem in range(1,11,1):
    path = "/home/lola/math_cluster/output/output-lausitz-10.0-pct-" + str(elem) + "-fCf_sCF_0.1_gS_4711_3765/analysis/general/run_info.csv"
    temp = pd.read_csv(path, sep=",")
    start= pd.Timestamp(temp.iloc[6,1])
    end= pd.Timestamp(temp.iloc[7,1])
    diff = end - start
    run_time_10pct.append(diff)
run_time_10pct = pd.DataFrame({'runtime': run_time_10pct})
run_time_10pct.insert(1, 'sample_size', "10-pct")
run_time_10pct.insert(2, 'alpha', 1.0)
run_time_10pct.insert(3, 'stuck_time', 30.0)
run_time_10pct.insert(4,'global_seed', "default") 

In [None]:
run_time_10pct_sCf = []
for elem in range(1,11,1):
    path = "/home/lola/math_cluster/output/output-lausitz-10.0-pct-" + str(elem) + "-fCf_0.1_sCF_0.17783_gS_4711_3765/analysis/general/run_info.csv"
    temp = pd.read_csv(path, sep=",")
    start= pd.Timestamp(temp.iloc[6,1])
    end= pd.Timestamp(temp.iloc[7,1])
    diff = end - start
    run_time_10pct_sCf.append(diff)
run_time_10pct_sCf = pd.DataFrame({'runtime': run_time_10pct_sCf})
run_time_10pct_sCf.insert(1, 'sample_size', "10-pct")
run_time_10pct_sCf.insert(2, 'alpha', 0.75)
run_time_10pct_sCf.insert(3, 'stuck_time', 30.0)
run_time_10pct_sCf.insert(4,'global_seed', "default") 

In [None]:

run_time_10pct_sT = []
for elem in range(1,11,1):
    path = "/home/lola/math_cluster/output/output-lausitz-10-pct-"+ str(elem) + "-fCf_sCF_0.1_gS_4711_sT_300.0_3765/analysis/general/run_info.csv"
    temp = pd.read_csv(path, sep=",")
    start= pd.Timestamp(temp.iloc[6,1])
    end= pd.Timestamp(temp.iloc[7,1])
    diff = end - start
    run_time_10pct_sT.append(diff)
run_time_10pct_sT = pd.DataFrame({'runtime': run_time_10pct_sT})
run_time_10pct_sT.insert(1, 'sample_size', "10-pct")
run_time_10pct_sT.insert(2, 'alpha', 1.0)
run_time_10pct_sT.insert(3, 'stuck_time', 300.0)
run_time_10pct_sT.insert(4,'global_seed', "default") 

In [None]:
run_time_10pct_sT_sCf = []
for elem in range(1,11,1):
    path = "/home/lola/math_cluster/output/output-lausitz-10-pct-" +str(elem) + "-fCf_0.1_sCF_0.17783_gS_4711_sT_300.0_3765/analysis/general/run_info.csv"
    temp = pd.read_csv(path, sep=",")
    start= pd.Timestamp(temp.iloc[6,1])
    end= pd.Timestamp(temp.iloc[7,1])
    diff = end - start
    run_time_10pct_sT_sCf.append(diff)
run_time_10pct_sT_sCf = pd.DataFrame({'runtime': run_time_10pct_sT_sCf})
run_time_10pct_sT_sCf.insert(1, 'sample_size', "10-pct")
run_time_10pct_sT_sCf.insert(2, 'alpha', 0.75)
run_time_10pct_sT_sCf.insert(3, 'stuck_time', 300.0)
run_time_10pct_sT_sCf.insert(4,'global_seed', "default") 


#### 25 pct

In [None]:
path =  "/home/lola/math_cluster/output/output-lausitz-25.0-pct-fCf_sCF_0.25_gS_4711_3765/analysis/general/run_info.csv"
temp = pd.read_csv(path, sep = ",")
start= pd.Timestamp(temp.iloc[6,1])
end= pd.Timestamp(temp.iloc[7,1])
diff = end - start
run_time_25pct = pd.DataFrame({'runtime': diff, 'sample_size': '25-pct', 'alpha': 1.0, 'stuck_time': 30.0, 'global_seed': "default"  }, index = [0])

In [None]:
path = "/home/lola/math_cluster/output/output-lausitz-25.0-pct-fCf_0.25_sCF_0.35355_gS_4711_3765/analysis/general/run_info.csv"
temp = pd.read_csv(path, sep = ",")
start= pd.Timestamp(temp.iloc[6,1])
end= pd.Timestamp(temp.iloc[7,1])
diff = end - start
run_time_25pct_sCf = pd.DataFrame({'runtime': diff, 'sample_size': '25-pct', 'alpha': 0.75, 'stuck_time': 30.0, 'global_seed': "default"  }, index = [1])

In [None]:
path = "/home/lola/math_cluster/output/output-lausitz-25-pct-1-fCf_sCF_0.25_gS_4711_sT_120.0_3765/analysis/general/run_info.csv"
temp = pd.read_csv(path, sep = ",")
start= pd.Timestamp(temp.iloc[6,1])
end= pd.Timestamp(temp.iloc[7,1])
diff = end - start
run_time_25pct_sT = pd.DataFrame({'runtime': diff, 'sample_size': '25-pct', 'alpha': 1.0, 'stuck_time': 120.0, 'global_seed': "default"  }, index = [2])

In [None]:
path = "/home/lola/math_cluster/output/output-lausitz-25-pct-1-fCf_0.25_sCF_0.35355_gS_4711_sT_120.0_3765/analysis/general/run_info.csv"
temp = pd.read_csv(path, sep = ",")
start= pd.Timestamp(temp.iloc[6,1])
end= pd.Timestamp(temp.iloc[7,1])
diff = end - start
run_time_25pct_sT_sCf = pd.DataFrame({'runtime': diff, 'sample_size': '25-pct', 'alpha': 0.75, 'stuck_time': 120.0, 'global_seed': "default"  }, index = [3])

#### 50 pct

In [None]:
path = "/home/lola/math_cluster/output/output-lausitz-50.0-pct-fCf_sCF_0.5_gS_4711_3765/analysis/general/run_info.csv"
temp = pd.read_csv(path, sep = ",")
start= pd.Timestamp(temp.iloc[6,1])
end= pd.Timestamp(temp.iloc[7,1])
diff = end - start
run_time_50pct = pd.DataFrame({'runtime': diff, 'sample_size': '50-pct', 'alpha': 1.0, 'stuck_time': 30.0, 'global_seed': "default"  }, index = [0])

In [None]:
path = "/home/lola/math_cluster/output/output-lausitz-50.0-pct-fCf_0.5_sCF_0.5946_gS_4711_3765/analysis/general/run_info.csv"
temp = pd.read_csv(path, sep = ",")
start= pd.Timestamp(temp.iloc[6,1])
end= pd.Timestamp(temp.iloc[7,1])
diff = end - start
run_time_50pct_sCf = pd.DataFrame({'runtime': diff, 'sample_size': '50-pct', 'alpha': 0.75, 'stuck_time': 30.0, 'global_seed': "default"  }, index = [1])

In [None]:
path = "/home/lola/math_cluster/output/output-lausitz-50-pct-1-fCf_sCF_0.5_gS_4711_sT_60.0_3765/analysis/general/run_info.csv"
temp = pd.read_csv(path, sep = ",")
start= pd.Timestamp(temp.iloc[6,1])
end= pd.Timestamp(temp.iloc[7,1])
diff = end - start
run_time_50pct_sT = pd.DataFrame({'runtime': diff, 'sample_size': '50-pct', 'alpha': 1.0, 'stuck_time': 60.0, 'global_seed': "default"  }, index = [2])

In [None]:
path = "/home/lola/math_cluster/output/output-lausitz-50-pct-1-fCf_0.5_sCF_0.5946_gS_4711_sT_60.0_3765/analysis/general/run_info.csv"
temp = pd.read_csv(path, sep = ",")
start= pd.Timestamp(temp.iloc[6,1])
end= pd.Timestamp(temp.iloc[7,1])
diff = end - start
run_time_50pct_sT_sCf = pd.DataFrame({'runtime': diff, 'sample_size': '50-pct', 'alpha': 0.75, 'stuck_time': 60.0, 'global_seed': "default"  }, index = [3])

In [None]:
path = "/home/lola/Nextcloud/Masterarbeit/03_Outputs_From_RunsLausitz/output-lausitz-25-pct-doubled-fCf_0.5_sCF_0.5_gS_4711_3765/analysis/general/run_info.csv"
temp = pd.read_csv(path, sep = ",")
start= pd.Timestamp(temp.iloc[6,1])
end= pd.Timestamp(temp.iloc[7,1])
diff = end - start
run_time_25_pct_doubled = pd.DataFrame({'runtime': diff, 'sample_size': '25-pct-doubled', 'alpha': 1.0, 'stuck_time': 30.0, 'global_seed': "default"  }, index = [4])

#### 100 pct

In [None]:
path =  "/home/lola/math_cluster/output/output-lausitz-100.0-pct-fCf_sCF_1.0_gS_4711_3765/analysis/general/run_info.csv"
temp = pd.read_csv(path, sep = ",")
start= pd.Timestamp(temp.iloc[6,1])
end= pd.Timestamp(temp.iloc[7,1])
diff = end - start
run_time_100pct = pd.DataFrame({'runtime': diff, 'sample_size': '100-pct', 'alpha': 1.0, 'stuck_time': 30.0, 'global_seed': "default"  }, index = [0])


In [None]:
path = "/home/lola/Nextcloud/Masterarbeit/03_Outputs_From_RunsLausitz/output-lausitz-25.0-pct-quadrupled-fCf_1.0_sCF_1.0_gS_4711_3765/analysis/general/run_info.csv"
temp = pd.read_csv(path, sep = ",")
start= pd.Timestamp(temp.iloc[6,1])
end= pd.Timestamp(temp.iloc[7,1])
diff = end - start
run_time_25_pct_quadrupled = pd.DataFrame({'runtime': diff, 'sample_size': '25-pct-quadrupled', 'alpha': 1.0, 'stuck_time': 30.0, 'global_seed': "default"  }, index = [4])

#### pd.Concat,  convert minutes into decimal base?! and write csv

In [None]:
# concat !
runtimes_1_100 = pd.concat([runtimes_1pct,  run_time_1pct_rGs,
                            run_time_5pct,run_time_5pct_sCf, run_time_5pct_sT, run_time_5pct_sT_sCf, run_time_5pct_rGs,
                            run_time_10pct,run_time_10pct_sCf, run_time_10pct_sT, run_time_10pct_sT_sCf,                  
                              run_time_25pct,run_time_25pct_sCf, run_time_25pct_sT, run_time_25pct_sT_sCf,
                                run_time_50pct,run_time_50pct_sCf, run_time_50pct_sT, run_time_50pct_sT_sCf,run_time_25_pct_doubled,
                                run_time_100pct, run_time_25_pct_quadrupled  ])


In [None]:
# transform minutes into decimal 
runtimes_1_100['runtime'] =  runtimes_1_100.runtime.astype('timedelta64[s]')/  pd.Timedelta(minutes=60)

In [None]:
runtimes_1_100 = runtimes_1_100.iloc[:, [1, 2, 3, 4,0]]
runtimes_1_100.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/runtimes_1_to_100pct_correct.csv', index = False) 

## 2. Number of stuck-time violations

#### Function to filter the output events and count number of stuck-time violations per vehicle

In [None]:
# Number of stuck vehicles
def countNumberOfStuckTimeViolations(pathToEvents, sampleSize, sampleNr, alpha, globalSeed, stuckTime):
    events_file = pathToEvents
   

    # Read events - filter and return the listed event types only
    events = matsim.event_reader(
        events_file,
        types="stuckAndContinue",
    )


    stuck_time_violations = pd.DataFrame()

    # Loop on all filtered events
    for event in events:
        if event["type"] == "stuckAndContinue":
            temp = pd.DataFrame(event, index = [0])
            stuck_time_violations = pd.concat([stuck_time_violations, temp], ignore_index= True)
    
    #outputPath = "/home/lola/Nextcloud/Masterarbeit/03_Outputs/stuck_time_violations_detailed_per_run/stuck_time_violations_lausitz_" + sampleSize +"-"+str(sampleNr) + "-alpha-"+ str(alpha) + "-gS-" + globalSeed + "-sT-" + str(stuckTime) + ".csv"
    #stuck_time_violations.to_csv(outputPath, index = False)
    temp = pd.DataFrame({'n_stuck_time_violations': stuck_time_violations.shape[0], 'sample_size': sampleSize, 'sample_nr':sampleNr, 'alpha': alpha, 'global_seed': globalSeed, 'stuck_time': stuckTime }, index =[0])
    return temp


#### 1pct

In [None]:
flowCapF = ["0.01"]
storCapF =  ["0.01", "0.03162"]

stuck_veh_1pct = pd.DataFrame()
for fCf in flowCapF:
    for sCf in storCapF:
        for sampleNr in range(1,11,1):
            # calculate adjusted stuck time
            default_stuck_time = 30.0
            adjusted_stuck_time = 30.0/float(flowCapF[0])
            # declare sample size as str "1-pct"
            sample_size_as_string = str(int(float(fCf)*100)) + "-pct"


            if ((fCf == "0.01") & (sCf == '0.01')):
                alpha = 1.0
                # paths for case 1 and 3 
                path_case1 = "/home/lola/math_cluster/output/output-lausitz-1pct-" + str(sampleNr) + "-fCf_sCF_"+ sCf + "_gS_default_3765/lausitz-1pct-"+str(sampleNr)+ "-fCf_sCf_0.01_gS_default_3765.output_events.xml.gz"
                path_case3 = "/home/lola/math_cluster/output/output-lausitz-1-pct-" + str(sampleNr) + "-fCf_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/lausitz-1-pct-" + str(sampleNr) + "-fCf_sCF_0.01_gS_4711_sT_3000.0_3765.output_events.xml.gz"
                
                # calculate number of stuck time violations and attach them to the data frame
                temp_case1 = countNumberOfStuckTimeViolations(path_case1, sample_size_as_string, sampleNr, alpha, "default", default_stuck_time)
                temp_case3 = countNumberOfStuckTimeViolations(path_case3, sample_size_as_string, sampleNr, alpha, "default", adjusted_stuck_time)
                stuck_veh_1pct = pd.concat([stuck_veh_1pct, temp_case1, temp_case3], ignore_index= True)

            else:
                alpha = 0.75
                # paths for case 2 and 4 
                path_case2 = "/home/lola/math_cluster/output/output-lausitz-1pct-" + str(sampleNr) + "-fCf_0.01_sCF_" + sCf + "_gS_default_3765/lausitz-1pct-" +str(sampleNr)+ "-fCf_0.01_sCf_0.03162_gS_default_3765.output_events.xml.gz"
                path_case4 = "/home/lola/math_cluster/output/output-lausitz-1-pct-" + str(sampleNr) + "-fCf_0.01_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/lausitz-1-pct-" + str(sampleNr) +"-fCf_0.01_sCF_0.03162_gS_4711_sT_3000.0_3765.output_events.xml.gz"
                
                # calculate number of stuck time violations and attach them to the data frame
                temp_case2 = countNumberOfStuckTimeViolations(path_case2, sample_size_as_string, sampleNr, alpha, "default", default_stuck_time)
                temp_case4 = countNumberOfStuckTimeViolations(path_case4, sample_size_as_string, sampleNr, alpha, "default", adjusted_stuck_time)
                stuck_veh_1pct = pd.concat([stuck_veh_1pct, temp_case2, temp_case4], ignore_index= True)


In [None]:
#  1 pct random seed
stuck_time_violations_1pct_rGs = pd.DataFrame()
rGs = [4711,3254, 2306, 6384,4338, 6003, 5502, 9377, 5621, 9002 ]
for seed in rGs:
    if (seed == 4711):
        global_seed = "rnd_" + str(seed)
        # insert number of stuck time violations from the first 1 pct sample
        stuck_time_violations = stuck_veh_1pct[(stuck_veh_1pct['alpha'] == 1.0) & (stuck_veh_1pct['stuck_time'] == 30.0) & (stuck_veh_1pct["sample_nr"] == 1)]["n_stuck_time_violations"]
        temp = {'n_stuck_time_violations': stuck_time_violations, 'sample_size': '1-pct','sample_nr': 1, 'alpha': 1.0, 'stuck_time': 30.0, 'global_seed': global_seed  }
        temp = pd.DataFrame(data=temp, index=[rGs.index(4711)])
        stuck_time_violations_1pct_rGs = pd.concat([stuck_time_violations_1pct_rGs, temp])
    elif (seed == 3254):
        global_seed = "rnd_" + str(seed)
        path ="/home/lola/math_cluster/output/output-lausitz-1pct-1-fCf_sCF_0.01_gS_3254_3765/lausitz-1pct-1-fCf_sCf_0.01_gS_3254_3765.output_events.xml.gz"
        stuck_time_violations = countNumberOfStuckTimeViolations(path, "1-pct", 1, 1.0 , global_seed, 30.0)
        stuck_time_violations_1pct_rGs = pd.concat([stuck_time_violations_1pct_rGs, stuck_time_violations], axis = 0, ignore_index= True)
    else:
        global_seed = "rnd_" + str(seed)
        path = "/home/lola/math_cluster/output/output-lausitz-1.0-pct-1-fCf_sCF_0.01_gS_" +str(seed) + "_3765/lausitz-1.0-pct-1-fCf_sCF_0.01_gS_" + str(seed) + "_3765.output_events.xml.gz"
        stuck_time_violations = countNumberOfStuckTimeViolations(path, "1-pct", 1, 1.0 , global_seed, 30.0)
        stuck_time_violations_1pct_rGs = pd.concat([stuck_time_violations_1pct_rGs, stuck_time_violations], axis = 0, ignore_index = True)

In [None]:
# concatenae and write to file
stuck_time_violation_1pct = pd.concat([stuck_veh_1pct, stuck_time_violations_1pct_rGs])
stuck_time_violation_1pct.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/stuck_time_violations_1_pct.csv', index = False) 

#### 5 pct

In [None]:
flowCapF = ["0.05"]
storCapF =  ["0.05", "0.10574"]

stuck_time_violations_5pct = pd.DataFrame()

for fCf in flowCapF:
    for sCf in storCapF:
            for sampleNr in range(1,11,1):
                # calculate adjusted stuck time
                default_stuck_time = 30.0
                adjusted_stuck_time = 30.0/float(flowCapF[0])
                # declare sample size as str "1-pct"
                sample_size_as_string = str(int(float(fCf)*100)) + "-pct"

                # declare paths 
                # declare path based on case 
                if((fCf == "0.05") & (sCf == "0.05")):
                    alpha = 1.0
                    if (sampleNr == 6):
                        path_case1  = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-6-fCf_sCF_0.05_gS_4711_3765/lausitz-5.0-pct-6-fCf_sCF_0.05_gS_4711_3765-2.output_events.xml.gz"
                    
                    else: 
                        path_case1 = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-" + str(sampleNr) + "-fCf_sCF_" + sCf + "_gS_4711_3765/lausitz-5.0-pct-"+str(sampleNr)+ "-fCf_sCF_0.05_gS_4711_3765.output_events.xml.gz"
                    
                    
                    path_case3 =  "/home/lola/math_cluster/output/output-lausitz-5-pct-" + str(sampleNr) + "-fCf_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/lausitz-5-pct-" + str(sampleNr) +"-fCf_sCF_0.05_gS_4711_sT_600.0_3765.output_events.xml.gz"

                    temp_case1 = countNumberOfStuckTimeViolations(path_case1, sample_size_as_string, sampleNr, alpha, "default", default_stuck_time)
                    temp_case3 = countNumberOfStuckTimeViolations(path_case3, sample_size_as_string, sampleNr, alpha, "default", adjusted_stuck_time)
                    stuck_time_violations_5pct = pd.concat([stuck_time_violations_5pct, temp_case1, temp_case3], ignore_index= True)
                    
                else:
                    alpha = 0.75
                    if(sampleNr == 6):
                        path_case2 = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-6-fCf_0.05_sCF_0.10574_gS_4711_3765/lausitz-5.0-pct-6-fCf_0.05_sCF_0.10574_gS_4711_3765-2.output_events.xml.gz"
                    else:
                        path_case2 = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-" + str(sampleNr) + "-fCf_" + fCf + "_sCF_" + sCf  + "_gS_4711_3765/lausitz-5.0-pct-"+str(sampleNr)+ "-fCf_0.05_sCF_0.10574_gS_4711_3765.output_events.xml.gz"
                    
                    
                    path_case4 = "/home/lola/math_cluster/output/output-lausitz-5-pct-" + str (sampleNr) + "-fCf_" + fCf + "_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/lausitz-5-pct-" + str(sampleNr) + "-fCf_0.05_sCF_0.10574_gS_4711_sT_600.0_3765.output_events.xml.gz"

                    # calculate number of stuck time violations and attach them to the data frame
                    temp_case2 = countNumberOfStuckTimeViolations(path_case2, sample_size_as_string, sampleNr, alpha, "default", default_stuck_time)
                    temp_case4 = countNumberOfStuckTimeViolations(path_case4, sample_size_as_string, sampleNr, alpha, "default", adjusted_stuck_time)
                    stuck_time_violations_5pct = pd.concat([stuck_time_violations_5pct, temp_case2, temp_case4], ignore_index= True)
                
                    

In [None]:
# 5 pct random global seed, alpha = 1
stuck_time_violations_5pct_rGs = pd.DataFrame()
rGs = [4711, 3254, 2306, 6384,4338, 6003, 5502, 9377, 5621, 9002 ]

for seed in rGs:
    if (seed ==4711):
        global_seed = "rnd_" + str(seed)
        stuck_time_violations = stuck_time_violations_5pct[(stuck_time_violations_5pct['alpha'] == 1.0) & (stuck_time_violations_5pct['stuck_time'] == 30.0) & (stuck_time_violations_5pct["sample_nr"] == 1)]["n_stuck_time_violations"]
        temp = {'n_stuck_time_violations': stuck_time_violations, 'sample_size': '5-pct','sample_nr': 1, 'alpha': 1.0, 'stuck_time': 30.0, 'global_seed': global_seed  }
        temp = pd.DataFrame(data=temp, index=[rGs.index(4711)])
        stuck_time_violations_5pct_rGs = pd.concat([stuck_time_violations_5pct_rGs, temp])
    else:
        global_seed = "rnd_" + str(seed)
        path = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-1-fCf_sCF_0.05_gS_" + str(seed) + "_3765/lausitz-5.0-pct-1-fCf_sCF_0.05_gS_"+ str(seed) +"_3765.output_events.xml.gz"
        
        stuck_time_violations = countNumberOfStuckTimeViolations(path, "5-pct", 1, 1.0 , global_seed, 30.0)
        stuck_time_violations_5pct_rGs = pd.concat([stuck_time_violations_5pct_rGs, stuck_time_violations], axis = 0, ignore_index = True)


In [None]:
stuck_time_violation_5pct = pd.concat([stuck_time_violations_5pct, stuck_time_violations_5pct_rGs])
stuck_time_violation_5pct.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/stuck_time_violations_5_pct.csv', index = False) 

#### 10 pct

In [None]:
flowCapF = ["0.1"]
storCapF =  ["0.1", "0.17783"]

stuck_time_violations_10pct = pd.DataFrame()
for fCf in flowCapF:
    for sCf in storCapF:
            for sampleNr in range(1,11,1):
                # calculate adjusted stuck time
                default_stuck_time = 30.0
                adjusted_stuck_time = 30.0/float(flowCapF[0])
                # declare sample size as str "1-pct"
                sample_size_as_string = str(int(float(fCf)*100)) + "-pct"
                # declare path based on case 
                if((fCf == "0.1") & (sCf == "0.1")):
                    alpha = 1.0
                    path_case1 = "/home/lola/math_cluster/output/output-lausitz-10.0-pct-" + str(sampleNr) + "-fCf_sCF_" + sCf + "_gS_4711_3765/lausitz-10.0-pct-" + str(sampleNr) + "-fCf_sCF_0.1_gS_4711_3765.output_events.xml.gz"
                    path_case3 = "/home/lola/math_cluster/output/output-lausitz-10-pct-" + str(sampleNr) + "-fCf_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/lausitz-10-pct-" + str(sampleNr) + "-fCf_sCF_0.1_gS_4711_sT_300.0_3765.output_events.xml.gz"
                    
                    temp_case1 = countNumberOfStuckTimeViolations(path_case1, sample_size_as_string, sampleNr, alpha, "default", default_stuck_time)
                    temp_case3 = countNumberOfStuckTimeViolations(path_case3, sample_size_as_string, sampleNr, alpha, "default", adjusted_stuck_time)
                    stuck_time_violations_10pct = pd.concat([stuck_time_violations_10pct, temp_case1, temp_case3], ignore_index= True)     
                else:
                    alpha = 0.75
                    path_case2 = "/home/lola/math_cluster/output/output-lausitz-10.0-pct-"+ str(sampleNr) + "-fCf_" + fCf + "_sCF_" + sCf +"_gS_4711_3765/lausitz-10.0-pct-" + str(sampleNr) + "-fCf_0.1_sCF_0.17783_gS_4711_3765.output_events.xml.gz"
                    path_case4 ="/home/lola/math_cluster/output/output-lausitz-10-pct-" + str(sampleNr) + "-fCf_" + fCf + "_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/lausitz-10-pct-" + str(sampleNr) + "-fCf_0.1_sCF_0.17783_gS_4711_sT_300.0_3765.output_events.xml.gz"

                    # calculate number of stuck time violations and attach them to the data frame
                    temp_case2 = countNumberOfStuckTimeViolations(path_case2, sample_size_as_string, sampleNr, alpha, "default", default_stuck_time)
                    temp_case4 = countNumberOfStuckTimeViolations(path_case4, sample_size_as_string, sampleNr, alpha, "default", adjusted_stuck_time)
                    stuck_time_violations_10pct = pd.concat([stuck_time_violations_10pct, temp_case2, temp_case4], ignore_index= True)
                    

In [None]:
# write to file
stuck_time_violations_10pct.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/stuck_time_violations_10_pct.csv', index = False) 

#### 25 pct

In [None]:
flowCapF = ["0.25"]
storCapF =  ["0.25", "0.35355"]

stuck_time_violations_25pct = pd.DataFrame()
counter = 0
for fCf in flowCapF:
    for sCf in storCapF:
            for sampleNr in range(1,2,1):
                # calculate adjusted stuck time
                default_stuck_time = 30.0
                adjusted_stuck_time = 30.0/float(flowCapF[0])
                # declare sample size as str "1-pct"
                sample_size_as_string = str(int(float(fCf)*100)) + "-pct"
                
                # declare path based on case 
                if((fCf == "0.25") & (sCf == "0.25")):
                    alpha = 1.0
                    path_case1 = "/home/lola/math_cluster/output/output-lausitz-25.0-pct-fCf_sCF_0.25_gS_4711_3765/lausitz-25.0-pct-fCf_sCF_0.25_gS_4711_3765.output_events.xml.gz"
                    path_case3 = "/home/lola/math_cluster/output/output-lausitz-25-pct-1-fCf_sCF_0.25_gS_4711_sT_120.0_3765/lausitz-25-pct-1-fCf_sCF_0.25_gS_4711_sT_120.0_3765.output_events.xml.gz"

                    temp_case1 = countNumberOfStuckTimeViolations(path_case1, sample_size_as_string, sampleNr, alpha, "default", default_stuck_time)
                    temp_case3 = countNumberOfStuckTimeViolations(path_case3, sample_size_as_string, sampleNr, alpha, "default", adjusted_stuck_time)
                    stuck_time_violations_25pct = pd.concat([stuck_time_violations_25pct, temp_case1, temp_case3], ignore_index= True)  

                else:
                    alpha = 0.75
                    path_case2 = "/home/lola/math_cluster/output/output-lausitz-25.0-pct-fCf_0.25_sCF_0.35355_gS_4711_3765/lausitz-25.0-pct-fCf_0.25_sCF_0.35355_gS_4711_3765.output_events.xml.gz"
                    path_case4 = "/home/lola/math_cluster/output/output-lausitz-25-pct-1-fCf_0.25_sCF_0.35355_gS_4711_sT_120.0_3765/lausitz-25-pct-1-fCf_0.25_sCF_0.35355_gS_4711_sT_120.0_3765.output_events.xml.gz"

                    # calculate number of stuck time violations and attach them to the data frame
                    temp_case2 = countNumberOfStuckTimeViolations(path_case2, sample_size_as_string, sampleNr, alpha, "default", default_stuck_time)
                    temp_case4 = countNumberOfStuckTimeViolations(path_case4, sample_size_as_string, sampleNr, alpha, "default", adjusted_stuck_time)
                    stuck_time_violations_25pct = pd.concat([stuck_time_violations_25pct, temp_case2, temp_case4], ignore_index= True)  


In [None]:
stuck_time_violations_25pct.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/stuck_time_violations_25_pct.csv', index = False) 

#### 50 pct

In [None]:
flowCapF = ["0.5"]
storCapF =  ["0.5", "0.5946"]

stuck_time_violations_50pct = pd.DataFrame()
counter = 0
for fCf in flowCapF:
    for sCf in storCapF:
            for sampleNr in range(1,2,1):
                # calculate adjusted stuck time
                default_stuck_time = 30.0
                adjusted_stuck_time = 30.0/float(flowCapF[0])
                # declare sample size as str "1-pct"
                sample_size_as_string = str(int(float(fCf)*100)) + "-pct"

                # declare path based on case 
                if((fCf == "0.5") & (sCf == "0.5")):
                    alpha = 1.0
                    path_case1 = "/home/lola/math_cluster/output/output-lausitz-50.0-pct-fCf_sCF_0.5_gS_4711_3765/lausitz-50.0-pct-fCf_sCF_0.5_gS_4711_3765.output_events.xml.gz"
                    path_case3 = "/home/lola/math_cluster/output/output-lausitz-50-pct-1-fCf_sCF_0.5_gS_4711_sT_60.0_3765/lausitz-50-pct-1-fCf_sCF_0.5_gS_4711_sT_60.0_3765.output_events.xml.gz"
                    
                    # calculate number of stuck time violations and attach them to the data frame
                    temp_case1 = countNumberOfStuckTimeViolations(path_case1, sample_size_as_string, sampleNr, alpha, "default", default_stuck_time)
                    temp_case3 = countNumberOfStuckTimeViolations(path_case3, sample_size_as_string, sampleNr, alpha, "default", adjusted_stuck_time)
                    stuck_time_violations_50pct = pd.concat([stuck_time_violations_50pct, temp_case1, temp_case3], ignore_index= True)     
                else:
                    alpha = 0.75
                    path_case2 = "/home/lola/math_cluster/output/output-lausitz-50.0-pct-fCf_0.5_sCF_0.5946_gS_4711_3765/lausitz-50.0-pct-fCf_0.5_sCF_0.5946_gS_4711_3765.output_events.xml.gz"
                    path_case4 ="/home/lola/math_cluster/output/output-lausitz-50-pct-1-fCf_0.5_sCF_0.5946_gS_4711_sT_60.0_3765/lausitz-50-pct-1-fCf_0.5_sCF_0.5946_gS_4711_sT_60.0_3765.output_events.xml.gz"
                    
                    # calculate number of stuck time violations and attach them to the data frame
                    temp_case2 = countNumberOfStuckTimeViolations(path_case2, sample_size_as_string, sampleNr, alpha, "default", default_stuck_time)
                    temp_case4 = countNumberOfStuckTimeViolations(path_case4, sample_size_as_string, sampleNr, alpha, "default", adjusted_stuck_time)
                    stuck_time_violations_50pct = pd.concat([stuck_time_violations_50pct, temp_case2, temp_case4], ignore_index= True)
                    



In [None]:
# 25 pct doubled
path = "/home/lola/Nextcloud/Masterarbeit/03_Outputs_From_RunsLausitz/output-lausitz-25-pct-doubled-fCf_0.5_sCF_0.5_gS_4711_3765/lausitz-25-pct-doubled-fCf_0.5_sCF_0.5_gS_4711__3765.output_events.xml.gz"
stuck_time_violations_25pct_doubled = countNumberOfStuckTimeViolations(path, "25-pct-doubled", 1, 1.0, "default", 30.0)

In [None]:
stuck_time_violation_50pct = pd.concat([stuck_time_violations_50pct, stuck_time_violations_25pct_doubled])
stuck_time_violation_50pct.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/stuck_time_violations_50_pct.csv', index = False) 

#### 100 pct

In [None]:
# 100 pct
path = "/home/lola/math_cluster/output/output-lausitz-100.0-pct-fCf_sCF_1.0_gS_4711_3765/lausitz-100.0-pct-fCf_sCF_1.0_gS_4711_3765.output_events.xml.gz"
stuck_time_violations_100pct = countNumberOfStuckTimeViolations(path,  "100-pct", 1, 1.0, "default", 30.0)


In [None]:
path = "/home/lola/Nextcloud/Masterarbeit/03_Outputs_From_RunsLausitz/output-lausitz-25.0-pct-quadrupled-fCf_1.0_sCF_1.0_gS_4711_3765/lausitz-25-pct-quadrupled-fCf_1.0_sCF_1.0_gS_4711__3765.output_events.xml.gz"
stuck_time_violations_25pct_quadrupled = countNumberOfStuckTimeViolations(path,  "25-pct-quadrupled", 1, 1.0, "default", 30.0)


In [None]:
stuck_time_violation_100pct = pd.concat([stuck_time_violations_100pct, stuck_time_violations_25pct_quadrupled])

#### pd.concat and write csv

In [None]:
# concat
stuck_time_violation_1_100 = pd.concat([stuck_time_violation_1pct, stuck_time_violation_5pct, stuck_time_violations_10pct,
                                        stuck_time_violations_25pct, stuck_time_violations_50pct, stuck_time_violation_100pct])

In [None]:
# write csv
stuck_time_violation_1_100.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/stuck_time_violations_1_to_100pct_July_28_2025.csv', index = False) 

## 2a. time distribution of stuck time violations and link types

In [2]:
def networkInfoToDf(pathToNetwork):
    input = gzip.open(pathToNetwork, 'r')
    tree = ET.parse(input)
    root = tree.getroot()
    # convert network to data frame 
    ids = []
    length = []
    freespeed = []
    capacity = []
    type_of_link = []

    for links in root.findall('links'):
        for link in links:
            #print(link.tag, link.attrib)
            ids.append(link.attrib['id'])
            length.append(float(link.attrib['length']))
            freespeed.append(float(link.attrib['freespeed'])*3.6)
            capacity.append(float(link.attrib['capacity']))
            type_counter = 0
            for child in link:
                for attr in child:
                    if (attr.attrib['name'] == "type"):
                        try:
                            type_of_link.append(attr.text)
                            type_counter = 1
                        except:
                            type_of_link.append('NA')
            if(type_counter == 0):
                type_of_link.append('NA')
    network_df = pd.DataFrame({'link': ids, 'length': length, 'freespeed': freespeed, 'capacity': capacity, 'link_type': type_of_link })
    return network_df

In [3]:
pathToLausitzNetwork = '/home/lola/Downloads/lausitz-v2024.2-network.xml.gz'
df_net = networkInfoToDf(pathToLausitzNetwork)

In [4]:
def countStuckTimeViolationsPerHour(pathToFile, sampleSizeString, sample_nr,  alpha, stuck_time):
    df_sTv = pd.read_csv(pathToFile)
    df_sTv['hour'] = np.floor(df_sTv['time']/ 3600)

    n_sTv_per_hour = []
    hour_storage = []
    alpha_storage = []
    sampleNr_storage = []
    sample_size_storage = []
    stuck_time_storage = []
    
    for hour in range(0,36,1): 
        hour_storage.append(hour)
        n_sTv_per_hour.append(df_sTv[df_sTv['hour'] == hour].shape[0])
        sample_size_storage.append(sampleSizeString)
        alpha_storage.append(alpha)
        sampleNr_storage.append(sample_nr)
        stuck_time_storage.append(stuck_time)

    df_sTv_perHour = pd.DataFrame({'hour': hour_storage, 'n_sTv': n_sTv_per_hour,'sample_size': sampleSizeString, 'sample_nr': sampleNr_storage, 'alpha': alpha_storage,  'stuck_time': stuck_time_storage})
    return df_sTv_perHour


In [5]:
def countLinkTypesOfNumberOfStuckTimeVioaltions(df_net,pathToFile, sampleSizeString, sample_nr, alpha, stuck_time):
    df_sTv = pd.read_csv(pathToFile)
    df_sTv = pd.merge(df_sTv, df_net, on = "link", how= "left")
    n_sTv_per_link_type = []
    link_type_storage = []
    alpha_storage = []
    sample_size_storage = []
    sampleNr_storage = []
    stuck_time_storage = []
    for link_type in df_sTv['link_type'].unique(): 
        link_type_storage.append(link_type)
        n_sTv_per_link_type.append(df_sTv[df_sTv['link_type'] == link_type].shape[0])
        alpha_storage.append(alpha)
        sample_size_storage.append(sampleSizeString)
        sampleNr_storage.append(sample_nr)
        stuck_time_storage.append(stuck_time)

    df_sTv_LinkType = pd.DataFrame({'link_type': link_type_storage, 'n_sTv': n_sTv_per_link_type, 'sample_size': sample_size_storage, 'sample_nr': sampleNr_storage,'alpha': alpha_storage, 'stuck_time': stuck_time_storage})
    return df_sTv_LinkType

In [6]:
def countLinkTypesAndStVPerHour(df_net,pathToFile,sampleSizeString, sample_nr, alpha, stuck_time):

    res1 = countStuckTimeViolationsPerHour(pathToFile, sampleSizeString, sample_nr, alpha, stuck_time)

    res2 = countLinkTypesOfNumberOfStuckTimeVioaltions(df_net,pathToFile, sampleSizeString, sample_nr, alpha, stuck_time)
    return [res1, res2]

In [8]:
sampleSizes = ["0.01", "0.05", "0.1", "0.25", "0.5", "1.0"]
alpha_values = ["1.0", "0.75"]

base_path = "/home/lola/Nextcloud/Masterarbeit/03_Outputs/stuck_time_viiolations_detailed_per_run/"
df_nStV_per_hour = pd.DataFrame()
df_nStV_per_link_type = pd.DataFrame()

for sampleSize in sampleSizes:
    for sampleNr in range(1,11,1):
        # calculate adjusted stuck time
        default_stuck_time = 30.0
        adjusted_stuck_time = 30.0/float(sampleSize)
        # declare sample size as str "1-pct"
        sample_size_as_string = str(int(float(sampleSize)*100)) + "-pct"
        if ((float(sampleSize) > 0.1) & (sampleNr >= 2)):
            continue

        elif ((sampleSize == "1.0") & (sampleNr == 1)):
            path_case_1 = base_path + "stuck_time_violations_lausitz_" + sample_size_as_string +"-"+ str(sampleNr) +"-alpha-"+ str(1.0) + "-gS-default-sT-" + str(default_stuck_time) + ".csv"
            res_case_1 = countLinkTypesAndStVPerHour(df_net, path_case_1, sample_size_as_string, sampleNr, 1.0, default_stuck_time)
            df_nStV_per_hour = pd.concat([df_nStV_per_hour, res_case_1[0]], ignore_index= True)
            df_nStV_per_link_type = pd.concat([df_nStV_per_link_type, res_case_1[1]], ignore_index= True)
            break  
            

        path_case_1 = base_path + "stuck_time_violations_lausitz_" + sample_size_as_string +"-"+ str(sampleNr) +"-alpha-"+ str(1.0) + "-gS-default-sT-" + str(default_stuck_time) + ".csv"
        path_case_2 = base_path + "stuck_time_violations_lausitz_" + sample_size_as_string +"-"+ str(sampleNr) +"-alpha-"+ str(0.75) + "-gS-default-sT-" + str(default_stuck_time) + ".csv"
        path_case_3 = base_path + "stuck_time_violations_lausitz_" + sample_size_as_string +"-"+ str(sampleNr) +"-alpha-"+ str(1.0) + "-gS-default-sT-" + str(adjusted_stuck_time) + ".csv"
        path_case_4 = base_path + "stuck_time_violations_lausitz_" + sample_size_as_string +"-"+ str(sampleNr) +"-alpha-"+ str(0.75) + "-gS-default-sT-" + str(adjusted_stuck_time) + ".csv"
        
        # calculate number of stuck time violations and attach them to the data frame
        res_case_1 = countLinkTypesAndStVPerHour(df_net, path_case_1, sample_size_as_string, sampleNr, 1.0, default_stuck_time)
        res_case_2 = countLinkTypesAndStVPerHour(df_net, path_case_2, sample_size_as_string, sampleNr, 0.75, default_stuck_time)
        res_case_3 = countLinkTypesAndStVPerHour(df_net, path_case_3, sample_size_as_string, sampleNr, 1.0, adjusted_stuck_time)
        res_case_4 = countLinkTypesAndStVPerHour(df_net, path_case_4, sample_size_as_string, sampleNr, 0.75, adjusted_stuck_time)
        df_nStV_per_hour = pd.concat([df_nStV_per_hour, res_case_1[0],res_case_2[0], res_case_3[0], res_case_4[0]], ignore_index= True)
        df_nStV_per_link_type = pd.concat([df_nStV_per_link_type, res_case_1[1], res_case_2[1], res_case_3[1], res_case_4[1]], ignore_index= True)     

                   

In [14]:
df_nStV_per_hour.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/N_sTV_per_hour.csv', index = False) 

In [10]:
df_nStV_per_link_type.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/N_sTV_per_Link_Type.csv', index = False) 

## 3. Count number of link leave events

#### Function to filter output_events and count the number of link leave events

In [25]:
# number of link leave events
def countLinkLeaveEvents(pathToEvents, sampleSize, sampleNr, alpha, globalSeed, stuckTime):
    events_file = pathToEvents

    # Read events - filter and return the listed event types only
    events = matsim.event_reader(
        events_file,
        types="left link",
    )
   
    # initialize counter for link leave events
    link_leave_event_counter = 0 

    # stream hour of link leave event to file
    with open(f"/home/lola/Nextcloud/Masterarbeit/03_Outputs/Link_leave_hour/link_leave_hour_{sampleSize}_sample_nr_{str(sampleNr)}_alpha_{str(alpha)}_gS_{globalSeed}_sT_{str(stuckTime)}.csv", 'w', newline='') as csvfile:
        fieldnames = ["time"]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        # Loop on all filtered events
        for event in events:
            if event["type"] == "left link":
                if("pt_" in event['link']):
                    continue
                link_leave_event_counter += 1
                myDict = {key: event[key] for key in fieldnames}
                writer.writerow(myDict)
    

    
    res = pd.DataFrame({'n_link_leave_events': link_leave_event_counter, 'sample_size': sampleSize, 'sample_nr':sampleNr, 'alpha': alpha, 'global_seed': globalSeed, 'stuck_time': stuckTime }, index =[0])
    
    return res
    

#### 1 pct

In [26]:
flowCapF = ["0.01"]
storCapF =  ["0.01", "0.03162"]

link_leave_events_1pct = pd.DataFrame()

for fCf in flowCapF:
    for sCf in storCapF:
        for sampleNr in range(1,11,1):
            # calculate adjusted stuck time
            default_stuck_time = 30.0
            adjusted_stuck_time = 30.0/float(flowCapF[0])
            # declare sample size as str "1-pct"
            sample_size_as_string = str(int(float(fCf)*100)) + "-pct"

            if ((fCf == "0.01") & (sCf == '0.01')):
                alpha = 1.0
                # paths for case 1 and 3 
                path_case1 = "/home/lola/math_cluster/output/output-lausitz-1pct-" + str(sampleNr) + "-fCf_sCF_"+ sCf + "_gS_default_3765/lausitz-1pct-"+str(sampleNr)+ "-fCf_sCf_0.01_gS_default_3765.output_events.xml.gz"
                path_case3 = "/home/lola/math_cluster/output/output-lausitz-1-pct-" + str(sampleNr) + "-fCf_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/lausitz-1-pct-" + str(sampleNr) + "-fCf_sCF_0.01_gS_4711_sT_3000.0_3765.output_events.xml.gz"
                
                # calculate number of stuck time violations and attach them to the data frame
                temp_case1 = countLinkLeaveEvents(path_case1, sample_size_as_string, sampleNr, alpha, "default", default_stuck_time)
                temp_case3 = countLinkLeaveEvents(path_case3, sample_size_as_string, sampleNr, alpha, "default", adjusted_stuck_time)
                link_leave_events_1pct = pd.concat([link_leave_events_1pct, temp_case1, temp_case3], ignore_index= True)
 
            else:
                alpha = 0.75
                # paths for case 2 and 4 
                path_case2 = "/home/lola/math_cluster/output/output-lausitz-1pct-" + str(sampleNr) + "-fCf_0.01_sCF_" + sCf + "_gS_default_3765/lausitz-1pct-" +str(sampleNr)+ "-fCf_0.01_sCf_0.03162_gS_default_3765.output_events.xml.gz"
                path_case4 = "/home/lola/math_cluster/output/output-lausitz-1-pct-" + str(sampleNr) + "-fCf_0.01_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/lausitz-1-pct-" + str(sampleNr) +"-fCf_0.01_sCF_0.03162_gS_4711_sT_3000.0_3765.output_events.xml.gz"
                
                # calculate number of stuck time violations and attach them to the data frame
                temp_case2 = countLinkLeaveEvents(path_case2, sample_size_as_string, sampleNr, alpha, "default", default_stuck_time)
                temp_case4 = countLinkLeaveEvents(path_case4, sample_size_as_string, sampleNr, alpha, "default", adjusted_stuck_time)
                link_leave_events_1pct = pd.concat([link_leave_events_1pct, temp_case2, temp_case4], ignore_index= True)


In [27]:
#  1 pct random seed
link_leave_1pct_rGs = pd.DataFrame()
rGs = [ 4711,3254, 2306, 6384,4338, 6003, 5502, 9377, 5621, 9002 ]
for seed in rGs:
    global_seed = "rnd_" + str(seed)
    if (seed == 4711):  
        
        linke_leave_events = link_leave_events_1pct[(link_leave_events_1pct['alpha'] == 1.0) & (link_leave_events_1pct['stuck_time'] == 30.0) & (link_leave_events_1pct['sample_nr'] == 1)]['n_link_leave_events']
        temp = {'n_link_leave_events': linke_leave_events , 'sample_size': '1-pct', 'alpha': 1.0, 'stuck_time': 30.0, 'global_seed': global_seed  }
        temp = pd.DataFrame(data=temp, index=[rGs.index(4711)])
        link_leave_1pct_rGs = pd.concat([link_leave_1pct_rGs, temp])
    elif (seed == 3254):
        path ="/home/lola/math_cluster/output/output-lausitz-1pct-1-fCf_sCF_0.01_gS_3254_3765/lausitz-1pct-1-fCf_sCf_0.01_gS_3254_3765.output_events.xml.gz"
        temp = countLinkLeaveEvents(path, "1-pct", 1, 1.0, global_seed, 30.0)
        link_leave_1pct_rGs = pd.concat([link_leave_1pct_rGs, temp], axis = 0)
    else:
        path = "/home/lola/math_cluster/output/output-lausitz-1.0-pct-1-fCf_sCF_0.01_gS_" +str(seed) + "_3765/lausitz-1.0-pct-1-fCf_sCF_0.01_gS_" + str(seed) + "_3765.output_events.xml.gz"
        temp = countLinkLeaveEvents(path, "1-pct", 1, alpha, global_seed, 30.0)
        link_leave_1pct_rGs = pd.concat([link_leave_1pct_rGs, temp], axis = 0)

In [28]:
link_leave_events_1pct = pd.concat([link_leave_events_1pct,link_leave_1pct_rGs], axis = 0, ignore_index= True)

In [29]:
# write 1 pct to csv
link_leave_events_1pct.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/link_leave_1pct_all.csv', index = False)

#### 5 pct

In [30]:
flowCapF = ["0.05"]
storCapF =  ["0.05", "0.10574"]

link_leave_events_5pct = pd.DataFrame()

for fCf in flowCapF:
    for sCf in storCapF:
            for sampleNr in range(1,11,1):
                # calculate adjusted stuck time
                default_stuck_time = 30.0
                adjusted_stuck_time = 30.0/float(flowCapF[0])
                # declare sample size as str "1-pct"
                sample_size_as_string = str(int(float(fCf)*100)) + "-pct"

                # declare path based on case 
                if((fCf == "0.05") & (sCf == "0.05")):
                    alpha = 1.0
                    if (sampleNr == 6):
                        path_case1  = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-6-fCf_sCF_0.05_gS_4711_3765/lausitz-5.0-pct-6-fCf_sCF_0.05_gS_4711_3765-2.output_events.xml.gz"
                    
                    else: 
                        path_case1 = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-" + str(sampleNr) + "-fCf_sCF_" + sCf + "_gS_4711_3765/lausitz-5.0-pct-"+str(sampleNr)+ "-fCf_sCF_0.05_gS_4711_3765.output_events.xml.gz"
                    
                    
                    path_case3 =  "/home/lola/math_cluster/output/output-lausitz-5-pct-" + str(sampleNr) + "-fCf_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/lausitz-5-pct-" + str(sampleNr) +"-fCf_sCF_0.05_gS_4711_sT_600.0_3765.output_events.xml.gz"

                    temp_case1 = countLinkLeaveEvents(path_case1, sample_size_as_string, sampleNr, alpha, "default", default_stuck_time)
                    temp_case3 = countLinkLeaveEvents(path_case3, sample_size_as_string, sampleNr, alpha, "default", adjusted_stuck_time)
                    link_leave_events_5pct = pd.concat([link_leave_events_5pct, temp_case1, temp_case3], ignore_index= True)
                    
                else:
                    alpha = 0.75
                    if(sampleNr == 6):
                        path_case2 = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-6-fCf_0.05_sCF_0.10574_gS_4711_3765/lausitz-5.0-pct-6-fCf_0.05_sCF_0.10574_gS_4711_3765-2.output_events.xml.gz"
                    else:
                        path_case2 = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-" + str(sampleNr) + "-fCf_" + fCf + "_sCF_" + sCf  + "_gS_4711_3765/lausitz-5.0-pct-"+str(sampleNr)+ "-fCf_0.05_sCF_0.10574_gS_4711_3765.output_events.xml.gz"
                    
                    
                    path_case4 = "/home/lola/math_cluster/output/output-lausitz-5-pct-" + str (sampleNr) + "-fCf_" + fCf + "_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/lausitz-5-pct-" + str(sampleNr) + "-fCf_0.05_sCF_0.10574_gS_4711_sT_600.0_3765.output_events.xml.gz"

                    # calculate number of stuck time violations and attach them to the data frame
                    temp_case2 = countLinkLeaveEvents(path_case2, sample_size_as_string, sampleNr, alpha, "default", default_stuck_time)
                    temp_case4 = countLinkLeaveEvents(path_case4, sample_size_as_string, sampleNr, alpha, "default", adjusted_stuck_time)
                    link_leave_events_5pct = pd.concat([link_leave_events_5pct, temp_case2, temp_case4], ignore_index= True)
                
                    

In [31]:
# 5 pct random global seed, alpha = 1
link_leave_5pct_rGs = pd.DataFrame()
rGs = [4711, 3254, 2306, 6384,4338, 6003, 5502, 9377, 5621, 9002 ]

for seed in rGs:
    if (seed ==4711):
        global_seed = "rnd_" + str(seed)
        link_leave_events = link_leave_events_5pct[(link_leave_events_5pct['alpha'] == 1.0) & (link_leave_events_5pct['stuck_time'] == 30.0) & (link_leave_events_5pct['sample_nr'] == 1)]['n_link_leave_events']
        temp = {'n_link_leave': link_leave_events, 'sample_size': '5-pct', 'alpha': 1.0, 'stuck_time': 30.0, 'global_seed': global_seed  }
        temp = pd.DataFrame(data=temp, index=[rGs.index(4711)])
        link_leave_5pct_rGs = pd.concat([link_leave_5pct_rGs, temp], ignore_index = True)
    else:
        path = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-1-fCf_sCF_0.05_gS_" + str(seed) + "_3765/lausitz-5.0-pct-1-fCf_sCF_0.05_gS_"+ str(seed) +"_3765.output_events.xml.gz"
        df = countLinkLeaveEvents(path, "5-pct", 1, 1.0, "rnd_" + str(seed), 30.0)

        link_leave_5pct_rGs = pd.concat([link_leave_5pct_rGs, df], ignore_index = True,  axis = 0)



In [32]:
link_leave_5pct_all = pd.concat([link_leave_events_5pct, link_leave_5pct_rGs], axis = 0)

In [33]:
link_leave_5pct_all.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/link_leave_5pct_all.csv', index = False) 


#### 10 pct

In [34]:
flowCapF = ["0.1"]
storCapF =  ["0.1", "0.17783"]

link_leave_events_10pct = pd.DataFrame()
for fCf in flowCapF:
    for sCf in storCapF:
            for sampleNr in range(1,11,1):
                # calculate adjusted stuck time
                default_stuck_time = 30.0
                adjusted_stuck_time = 30.0/float(flowCapF[0])
                # declare sample size as str "1-pct"
                sample_size_as_string = str(int(float(fCf)*100)) + "-pct"
                # declare path based on case 
                if((fCf == "0.1") & (sCf == "0.1")):
                    alpha = 1.0
                    path_case1 = "/home/lola/math_cluster/output/output-lausitz-10.0-pct-" + str(sampleNr) + "-fCf_sCF_" + sCf + "_gS_4711_3765/lausitz-10.0-pct-" + str(sampleNr) + "-fCf_sCF_0.1_gS_4711_3765.output_events.xml.gz"
                    path_case3 = "/home/lola/math_cluster/output/output-lausitz-10-pct-" + str(sampleNr) + "-fCf_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/lausitz-10-pct-" + str(sampleNr) + "-fCf_sCF_0.1_gS_4711_sT_300.0_3765.output_events.xml.gz"
                    
                    temp_case1 = countLinkLeaveEvents(path_case1, sample_size_as_string, sampleNr, alpha, "default", default_stuck_time)
                    temp_case3 = countLinkLeaveEvents(path_case3, sample_size_as_string, sampleNr, alpha, "default", adjusted_stuck_time)
                    link_leave_events_10pct = pd.concat([link_leave_events_10pct, temp_case1, temp_case3], ignore_index= True)     
                else:
                    alpha = 0.75
                    path_case2 = "/home/lola/math_cluster/output/output-lausitz-10.0-pct-"+ str(sampleNr) + "-fCf_" + fCf + "_sCF_" + sCf +"_gS_4711_3765/lausitz-10.0-pct-" + str(sampleNr) + "-fCf_0.1_sCF_0.17783_gS_4711_3765.output_events.xml.gz"
                    path_case4 ="/home/lola/math_cluster/output/output-lausitz-10-pct-" + str(sampleNr) + "-fCf_" + fCf + "_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/lausitz-10-pct-" + str(sampleNr) + "-fCf_0.1_sCF_0.17783_gS_4711_sT_300.0_3765.output_events.xml.gz"

                    # calculate number of stuck time violations and attach them to the data frame
                    temp_case2 = countLinkLeaveEvents(path_case2, sample_size_as_string, sampleNr, alpha, "default", default_stuck_time)
                    temp_case4 = countLinkLeaveEvents(path_case4, sample_size_as_string, sampleNr, alpha, "default", adjusted_stuck_time)
                    link_leave_events_10pct = pd.concat([link_leave_events_10pct, temp_case2, temp_case4], ignore_index= True)
                    

In [35]:
# write csv
link_leave_events_10pct.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/link_leave_events_10pct.csv', index = False) 

#### 25 pct 

In [36]:
flowCapF = ["0.25"]
storCapF =  ["0.25", "0.35355"]

link_leave_events_25pct = pd.DataFrame()
counter = 0
for fCf in flowCapF:
    for sCf in storCapF:
            for sampleNr in range(1,2,1):
                # calculate adjusted stuck time
                default_stuck_time = 30.0
                adjusted_stuck_time = 30.0/float(flowCapF[0])
                # declare sample size as str "1-pct"
                sample_size_as_string = str(int(float(fCf)*100)) + "-pct"
                
                # declare path based on case 
                if((fCf == "0.25") & (sCf == "0.25")):
                    alpha = 1.0
                    path_case1 = "/home/lola/math_cluster/output/output-lausitz-25.0-pct-fCf_sCF_0.25_gS_4711_3765/lausitz-25.0-pct-fCf_sCF_0.25_gS_4711_3765.output_events.xml.gz"
                    path_case3 = "/home/lola/math_cluster/output/output-lausitz-25-pct-1-fCf_sCF_0.25_gS_4711_sT_120.0_3765/lausitz-25-pct-1-fCf_sCF_0.25_gS_4711_sT_120.0_3765.output_events.xml.gz"

                    temp_case1 = countLinkLeaveEvents(path_case1, sample_size_as_string, sampleNr, alpha, "default", default_stuck_time)
                    temp_case3 = countLinkLeaveEvents(path_case3, sample_size_as_string, sampleNr, alpha, "default", adjusted_stuck_time)
                    link_leave_events_25pct = pd.concat([link_leave_events_25pct, temp_case1, temp_case3], ignore_index= True)  

                else:
                    alpha = 0.75
                    path_case2 = "/home/lola/math_cluster/output/output-lausitz-25.0-pct-fCf_0.25_sCF_0.35355_gS_4711_3765/lausitz-25.0-pct-fCf_0.25_sCF_0.35355_gS_4711_3765.output_events.xml.gz"
                    path_case4 = "/home/lola/math_cluster/output/output-lausitz-25-pct-1-fCf_0.25_sCF_0.35355_gS_4711_sT_120.0_3765/lausitz-25-pct-1-fCf_0.25_sCF_0.35355_gS_4711_sT_120.0_3765.output_events.xml.gz"

                    # calculate number of stuck time violations and attach them to the data frame
                    temp_case2 = countLinkLeaveEvents(path_case2, sample_size_as_string, sampleNr, alpha, "default", default_stuck_time)
                    temp_case4 = countLinkLeaveEvents(path_case4, sample_size_as_string, sampleNr, alpha, "default", adjusted_stuck_time)
                    link_leave_events_25pct = pd.concat([link_leave_events_25pct, temp_case2, temp_case4], ignore_index= True)  


In [37]:
# write csv
link_leave_events_25pct.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/link_leave_events_25pct.csv', index = False) 

#### 50 pct

In [38]:
flowCapF = ["0.5"]
storCapF =  ["0.5", "0.5946"]

link_leave_events_50pct = pd.DataFrame()
for fCf in flowCapF:
    for sCf in storCapF:
            for sampleNr in range(1,2,1):
                # calculate adjusted stuck time
                default_stuck_time = 30.0
                adjusted_stuck_time = 30.0/float(flowCapF[0])
                # declare sample size as str "1-pct"
                sample_size_as_string = str(int(float(fCf)*100)) + "-pct"

                # declare path based on case 
                if((fCf == "0.5") & (sCf == "0.5")):
                    alpha = 1.0
                    path_case1 = "/home/lola/math_cluster/output/output-lausitz-50.0-pct-fCf_sCF_0.5_gS_4711_3765/lausitz-50.0-pct-fCf_sCF_0.5_gS_4711_3765.output_events.xml.gz"
                    path_case3 = "/home/lola/math_cluster/output/output-lausitz-50-pct-1-fCf_sCF_0.5_gS_4711_sT_60.0_3765/lausitz-50-pct-1-fCf_sCF_0.5_gS_4711_sT_60.0_3765.output_events.xml.gz"
                    
                    # calculate number of stuck time violations and attach them to the data frame
                    temp_case1 = countLinkLeaveEvents(path_case1, sample_size_as_string, sampleNr, alpha, "default", default_stuck_time)
                    temp_case3 = countLinkLeaveEvents(path_case3, sample_size_as_string, sampleNr, alpha, "default", adjusted_stuck_time)
                    link_leave_events_50pct = pd.concat([link_leave_events_50pct, temp_case1, temp_case3], ignore_index= True)     
                else:
                    alpha = 0.75
                    path_case2 = "/home/lola/math_cluster/output/output-lausitz-50.0-pct-fCf_0.5_sCF_0.5946_gS_4711_3765/lausitz-50.0-pct-fCf_0.5_sCF_0.5946_gS_4711_3765.output_events.xml.gz"
                    path_case4 ="/home/lola/math_cluster/output/output-lausitz-50-pct-1-fCf_0.5_sCF_0.5946_gS_4711_sT_60.0_3765/lausitz-50-pct-1-fCf_0.5_sCF_0.5946_gS_4711_sT_60.0_3765.output_events.xml.gz"
                    
                    # calculate number of stuck time violations and attach them to the data frame
                    temp_case2 = countLinkLeaveEvents(path_case2, sample_size_as_string, sampleNr, alpha, "default", default_stuck_time)
                    temp_case4 = countLinkLeaveEvents(path_case4, sample_size_as_string, sampleNr, alpha, "default", adjusted_stuck_time)
                    link_leave_events_50pct = pd.concat([link_leave_events_50pct, temp_case2, temp_case4], ignore_index= True)
                    



In [39]:
# write csv
link_leave_events_50pct.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/link_leave_events_50pct.csv', index = False) 

#### 100 pct

In [40]:
path = "/home/lola/math_cluster/output/output-lausitz-100.0-pct-fCf_sCF_1.0_gS_4711_3765/lausitz-100.0-pct-fCf_sCF_1.0_gS_4711_3765.output_events.xml.gz"
link_leave_events_100pct = countLinkLeaveEvents(path,  "100-pct", 1, 1.0, "default", 30.0)

In [41]:
# write csv
link_leave_events_100pct.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/link_leave_events_100pct.csv', index = False) 

In [42]:
link_leave_1_100 = pd.concat([link_leave_events_1pct, link_leave_5pct_all, link_leave_events_10pct, link_leave_events_25pct, link_leave_events_50pct, link_leave_events_100pct], axis = 0)

In [43]:
# write csv
link_leave_1_100.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/link_leave_1_100pct.csv', index = False) 

### Aggregate the number of link leave events per hour

In [44]:
sample_sizes = ['1', '5', '10', '25', '50', '100']
alpha_values = ['1.0', '0.75']
sT_default = 30.0

link_leave_per_hour = pd.DataFrame()
base_path = "/home/lola/Nextcloud/Masterarbeit/03_Outputs/Link_leave_hour/"
for sampleSize in sample_sizes:
    # calculate adjusted stuck time
    size_in_pct = float(sampleSize)/100.0
    size_inverted = 1/ size_in_pct
    sT_adjusted = 30.0*size_inverted
    if (sampleSize == '100'):
        stuck_times = [sT_default]
    else: 
        stuck_times = [sT_default, sT_adjusted]
    
    # declare sampleSize + "-pct"
    sample_size_string = sampleSize + "-pct"
    factor_scale_to_100 = 100.0 / float(sampleSize)

    for sT in stuck_times:
        for alpha in alpha_values: 
            for sampleNr in range(1,11,1):
                if((int(sampleSize) > 10) & (sampleNr > 1)):
                    continue
                if((sampleSize == '100') & (alpha == '0.75')):
                    continue
                df_link_leave = pd.read_csv(base_path + f"link_leave_hour_{sample_size_string}_sample_nr_{str(sampleNr)}_alpha_{alpha}_gS_default_sT_{str(sT)}.csv")
                df_link_leave['hour'] = np.floor(df_link_leave['time'] / 3600)
                for hour in range(0,36,1):
                
                    temp = df_link_leave[(df_link_leave['hour'] == hour)].shape[0]*factor_scale_to_100

                    if (temp > 0):
                        df = pd.DataFrame({'sample_size': sample_size_string, 'alpha': float(alpha), 'hour': hour, 'stuck_time': sT, 'n_Link_Leave': temp}, index = [0])
                        link_leave_per_hour = pd.concat([link_leave_per_hour, df], ignore_index= True)
                    else:
                        df = pd.DataFrame({'sample_size': sample_size_string, 'alpha': float(alpha), 'hour': hour, 'stuck_time': sT, 'n_Link_Leave': temp}, index = [0])
                        link_leave_per_hour = pd.concat([link_leave_per_hour, df], ignore_index= True)

In [46]:
link_leave_per_hour.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/link_leave_1_100pct_per_hour.csv', index = False) 

## 3a. Determine average speed per road type and hour

In [4]:
def networkInfoToDf(pathToNetwork):
    input = gzip.open(pathToNetwork, 'r')
    tree = ET.parse(input)
    root = tree.getroot()
    # convert network to data frame 
    ids = []
    length = []
    freespeed = []
    capacity = []
    type_of_link = []

    for links in root.findall('links'):
        for link in links:
            #print(link.tag, link.attrib)
            ids.append(link.attrib['id'])
            length.append(float(link.attrib['length']))
            freespeed.append(float(link.attrib['freespeed'])*3.6)
            capacity.append(float(link.attrib['capacity']))
            type_counter = 0
            for child in link:
                for attr in child:
                    if (attr.attrib['name'] == "type"):
                        try:
                            type_of_link.append(attr.text)
                            type_counter = 1
                        except:
                            type_of_link.append('NA')
            if(type_counter == 0):
                type_of_link.append('NA')
    network_df = pd.DataFrame({'ID': ids, 'length': length, 'freespeed': freespeed, 'capacity': capacity, 'type': type_of_link })
    return network_df
    

In [None]:
def departuresTable(pathToEvents):
    dep_events = matsim.event_reader(pathToEvents, types= "departure")

    time_dep = []
    id_dep= []
    link_dep = []
    legMode_dep = []

    for event in dep_events:
        if event['type'] == "departure":
            time_dep.append(event['time'])
            id_dep.append(event['person'])
            link_dep.append(event['link'])
            legMode_dep.append(event['legMode'])

    departures = pd.DataFrame({'ID': id_dep, 'time_dep': time_dep, 'link_dep': link_dep, 'legMode': legMode_dep})
    departures = departures[(departures['ID'].str.contains('pt_') == False)]
    departures = departures[(departures['legMode'].str.contains('car') == True)]
    return departures


In [13]:
def arrivalsTable(pathToEvents):
    arr_events = matsim.event_reader(pathToEvents, types= "arrival")

    time_arr = []
    id_arr = []
    link_arr = []
    type_arr = []
    legMode_arr = []

    for event in arr_events:
        if event['type'] == "arrival":
            time_arr.append(event['time'])
            link_arr.append(event['link'])
            type_arr.append(event['type'])
            id_arr.append(event['person'])
            legMode_arr.append(event['legMode'])
    arrivals = pd.DataFrame({'ID': id_arr, 'time_arr': time_arr, 'link_arr': link_arr, 'legMode': legMode_arr})
    arrivals = arrivals[(arrivals['ID'].str.contains('pt_') == False)]
    arrivals = arrivals[(arrivals['legMode'].str.contains('car') == True)]
    return arrivals


In [None]:
def calcTravelTime(departures, arrivals):
    dep_storage = []
    arr_storage = []
    person_storage = []
    trip_number = []
    depLink_storage = []
    arrLink_storage = []
    trip_id_storage = []

    unique_person = departures['ID'].unique()
    for person in unique_person:
        temp_depLink = np.array(departures[departures['ID'] == person]['link_dep'])
        temp_arrLink = np.array(arrivals[arrivals['ID'] == person]['link_arr'])
        temp_dep = np.array(departures[departures['ID'] == person]['time_dep'])
        temp_arr = np.array(arrivals[arrivals['ID'] == person]['time_arr'])
        # only do this when the person has departures
        if ((len(temp_dep) > 0) & (len(temp_arr) > 0)):
            for element in range(0, len(temp_dep),1):
                trip_id_storage.append(str(person) +"_" +str(element +1))
                person_storage.append(person)
                trip_number.append(element +1)
                dep_storage.append(temp_dep[element])
                arr_storage.append(temp_arr[element])
                depLink_storage.append(temp_depLink[element])
                arrLink_storage.append(temp_arrLink[element])
    df_dep_arr = pd.DataFrame({'person_id': person_storage,'trip_number': trip_number, 'trip_id': trip_id_storage,'dep_time': dep_storage, 'dep_link': depLink_storage, 'arr_time': arr_storage, 'arr_link': arrLink_storage})
    # calculate travel time here?
    return df_dep_arr


In [20]:
def enterEventsTable(pathToEvents, df_net):
    enter_events = matsim.event_reader(pathToEvents, types= "entered link")

    time_entered = []
    vehicle_entered = []
    link_entered = []
    type_entered = []

    for event in enter_events:
        if event["type"] == "entered link":
            time_entered.append(event['time'])
            vehicle_entered.append(event['vehicle'])
            link_entered.append(event['link'])
            type_entered.append(event['type'])
            
    df_linkEnter = pd.DataFrame({'enter_time' : time_entered, 'link_id': link_entered, 'vehicle_id': vehicle_entered, 'type_of_event': type_entered})
    df_enter_no_pt = df_linkEnter[(df_linkEnter['link_id'].str.contains('pt_') == False)].copy()
    df_enter_no_pt = pd.merge(df_enter_no_pt, df_net[['ID', 'type']], how='left', left_on='link_id', right_on='ID')
    return df_enter_no_pt

In [21]:
def leaveEventsTable(pathToEvents, df_net):
    leave_events = matsim.event_reader(pathToEvents, types= "left link")

    time_left = []
    vehicle_left = []
    link_left = []
    type_left = []

    for event in leave_events: 
        if event["type"] == "left link":
            time_left.append(event['time'])
            vehicle_left.append(event['vehicle'])
            link_left.append(event['link'])
            type_left.append(event['type'])


    df_linkLeft = pd.DataFrame({'leave_time' : time_left, 'link_id': link_left, 'vehicle_id': vehicle_left, 'type_of_event': type_left})
    df_leave_no_pt = df_linkLeft[(df_linkLeft['link_id'].str.contains('pt_') == False )]
    df_leave_no_pt = pd.merge(df_leave_no_pt, df_net[['ID', 'type']], how='left', left_on='link_id', right_on='ID')
    return df_leave_no_pt

In [None]:
def extractCarRouteFromPlans(pathToPlans):


    input = gzip.open(pathToPlans, 'r')
    tree = ET.parse(input)
    root = tree.getroot()
    # convert network to data frame 


    person_id = []
    vehicle_id = []
    route_storage =[]
    trip_id_storage = []
    start_link = []
    end_link = []
    first_route = []
    last_route = []

#iterate over a persons in the experienced plans file 
    for person in root.findall('person'):
        trip_counter = 0
        # there is only the selected plan in the experienced plans file, so it is enough to only find all plans which are children of a person
        for plan in person.findall('plan'):
            if(plan.attrib['selected'] != 'yes'):
                continue
            else:     
                # find all legs
                for leg in plan.findall('leg'):
                    #print(leg.attrib['mode'])
                    # only for car legs
                    if (leg.attrib['mode'] != "car"):
                        continue
                    elif(leg.attrib['mode'] == "car"):
                        trip_counter += 1
                        # find the route
                        for route in leg.findall('route'):              
                            # get all links of the route:
                            temp_route = route.text.split()
                            if (len(temp_route) == 0):
                                print("route of length 0 ")
                                continue
                            else: 
                                if((route.attrib['start_link'] == temp_route[0]) & ((route.attrib['end_link'] == temp_route[len(temp_route)-1])) ):
                                    for element in temp_route[1:len(temp_route)-2]:
                                        person_id.append(person.attrib['id'])
                                        route_storage.append(element)
                                        trip_id_storage.append(person.attrib['id'] + "_" + str(trip_counter))
                                        vehicle_id.append(person.attrib['id'] + "_car")
                                        start_link.append(temp_route[0])
                                        end_link.append(temp_route[len(temp_route)-1])
                                        first_route.append(temp_route[1])
                                        last_route.append(temp_route[len(temp_route)-2])

                                elif((route.attrib['start_link'] == temp_route[0]) & ((route.attrib['end_link'] != temp_route[len(temp_route)-1])) ):
                                    for element in temp_route[1:len(temp_route)-1]:
                                        person_id.append(person.attrib['id'])
                                        route_storage.append(element)
                                        trip_id_storage.append(person.attrib['id'] + "_" + str(trip_counter))
                                        vehicle_id.append(person.attrib['id'] + "_car")
                                        start_link.append(temp_route[0])
                                        end_link.append(route.attrib['end_link'])
                                        first_route.append(temp_route[1])
                                        last_route.append(temp_route[len(temp_route)-1])

                                elif((route.attrib['start_link'] != temp_route[0]) & ((route.attrib['end_link'] == temp_route[len(temp_route)-1])) ):
                                    for element in temp_route[0:len(temp_route)-2]:
                                        person_id.append(person.attrib['id'])
                                        route_storage.append(element)
                                        trip_id_storage.append(person.attrib['id'] + "_" + str(trip_counter))
                                        vehicle_id.append(person.attrib['id'] + "_car")
                                        start_link.append(route.attrib['start_link'])
                                        end_link.append(temp_route[len(temp_route)-1])
                                        first_route.append(temp_route[0])
                                        last_route.append(temp_route[len(temp_route)-2])

                                else: 
                                    for element in temp_route:
                                        person_id.append(person.attrib['id'])
                                        route_storage.append(element)
                                        trip_id_storage.append(person.attrib['id'] + "_" + str(trip_counter))
                                        vehicle_id.append(person.attrib['id'] + "_car")
                                        start_link.append(route.attrib['start_link'])
                                        end_link.append(route.attrib['end_link'])
                                        first_route.append(temp_route[0])
                                        last_route.append(temp_route[len(temp_route)-1])
    df_Person_Link = pd.DataFrame({'person': person_id, 'trip_id':trip_id_storage, 'vehicle_id': vehicle_id, 'link_id':route_storage, 'start_link': start_link, 'end_link': end_link,})
    return df_Person_Link
                                    


In [18]:
def aggregateResultByRoadTypeAndHour(result):
    result['time_on_link'] = result['time_link_left'] - result['time_link_entered']
    result['m_per_s'] = result['length'] / result['time_on_link']
    result['hour_link_entered'] = np.floor(result['time_link_entered'] / 3600)
    result = result.sort_values(by=['type', 'hour_link_entered'])
    hour_storage = []
    type_storage = []
    speed_storage = []

    for roadType in result['type'].unique():
        for hour in result[(result['type']== roadType)]['hour_link_entered'].unique():
            hour_storage.append(hour)
            type_storage.append(roadType)
            speed_storage.append(np.mean(result[(result['type']== roadType) & (result['hour_link_entered']== hour)]['m_per_s'])*3.6)
    res_aggr = pd.DataFrame({'type': type_storage, 'hour': hour_storage, 'speed': speed_storage})
    return res_aggr

In [None]:
def leftJoinEnterAndLeaveToRoute(df_enterEvents, df_leaveEvents, df_PersonAndRoute):

    # from gpt 
    duckdb_conn = duckdb.connect()

    # from gpt 
    duckdb_conn.register("df_PersonAndRoute", df_PersonAndRoute) 
    #duckdb_conn.execute("CREATE TABLE PersonLink AS SELECT * FROM df_PersonAndRoute")

    # from gpt 
    duckdb_conn.register("df_enterEvents", df_enterEvents) 
    #duckdb_conn.execute("CREATE TABLE LinkEnter AS SELECT * FROM df_enterEvents")

    duckdb_conn.register("df_leaveEvents", df_leaveEvents) 
    #duckdb_conn.execute("CREATE TABLE LinkLeave AS SELECT * FROM df_leaveEvents")

    # query from gpt 
    result= duckdb_conn.query("""SELECT 
        m.person,
        m.vehicle_id,
        m.trip_id,
        m.link_id,
        m.dep_link,
        m.dep_time,
        m.arr_link,
        m.arr_time,
        m.type,
        m.length,
        s1.enter_time AS time_link_entered,
        t2.leave_time AS time_link_left,
    FROM df_PersonAndRoute m
    LEFT JOIN df_enterEvents s1 
        ON m.link_id = s1.link_id
        AND m.vehicle_id = s1.vehicle_id
        AND s1.enter_time BETWEEN m.dep_time AND m.arr_time                            
    LEFT JOIN df_leaveEvents t2 
        ON m.link_id = t2.link_id
        AND m.vehicle_id = t2.vehicle_id
        AND t2.leave_time BETWEEN m.dep_time AND m.arr_time;
                        """).to_df()
    duckdb_conn.close()

    aggr_result = aggregateResultByRoadTypeAndHour(result)


    return aggr_result


In [22]:
def calcAvgSpeedPerRoadTypeAndHour(pathToCase, network_df):
    pathToEvents = pathToCase +  ".output_events.xml.gz"
    pathToPlans = pathToCase + ".output_experienced_plans.xml.gz"
    # departures, arrivals and travel time
    df_dep = departuresTable(pathToEvents)
    df_arr = arrivalsTable(pathToEvents)
    df_travTime = calcTravelTime(departures= df_dep, arrivals=  df_arr)
    df_PersonRoute = extractCarRouteFromPlans(pathToPlans)

    # left join travel time to the persons route
    df_PersonRoute = pd.merge(df_PersonRoute, df_travTime[['trip_id', 'dep_time', 'dep_link', 'arr_time', 'arr_link']], on = "trip_id", how = "left")
    # left join the net to the route
    df_PersonRoute = pd.merge(df_PersonRoute, network_df[['ID', 'type', 'length']], how='left', left_on='link_id', right_on='ID')
    # reduce to secondary, residential and tertiary
    df_PersonRoute = df_PersonRoute[(df_PersonRoute['type'] == 'highway.secondary') | (df_PersonRoute['type'] == 'highway.residential') | (df_PersonRoute['type'] == 'highway.tertiary') ]
    # drop column ID
    df_PersonRoute = df_PersonRoute.drop(columns=['ID'])

    # enter and leave events
    df_enter = enterEventsTable(pathToEvents, network_df)
    df_leave = leaveEventsTable(pathToEvents, network_df)

    # calculate the avg speed per road type and hour
    result = leftJoinEnterAndLeaveToRoute(df_enter, df_leave, df_PersonRoute)
    return result


In [None]:
pathToLausitzNetwork = '/net/ils/mersini/input/v2024.2/lausitz-v2024.2-network.xml.gz'
df_net = networkInfoToDf(pathToLausitzNetwork)


### 1 pct

In [43]:
flowCapF = ["0.01"]
storCapF =  ["0.01", "0.03162"]

avg_speed_per_RoadTypeAndHour_1pct = pd.DataFrame()
for fCf in flowCapF:
    for sCf in storCapF:
        for sampleNr in range(1,11,1):
            # calculate adjusted stuck time
            default_stuck_time = 30.0
            adjusted_stuck_time = 30.0/float(flowCapF[0])
            # declare sample size as str "1-pct"
            sample_size_as_string = str(int(float(fCf)*100)) + "-pct"

            if ((fCf == "0.01") & (sCf == '0.01')):
                alpha = 1.0
                # paths for case 1 and 3 
                path_case1 = "/home/lola/math_cluster/output/output-lausitz-1pct-" + str(sampleNr) + "-fCf_sCF_"+ sCf + "_gS_default_3765/lausitz-1pct-"+str(sampleNr)+ "-fCf_sCf_0.01_gS_default_3765"
                path_case3 = "/home/lola/math_cluster/output/output-lausitz-1-pct-" + str(sampleNr) + "-fCf_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/lausitz-1-pct-" + str(sampleNr) + "-fCf_sCF_0.01_gS_4711_sT_3000.0_3765"
                
                # calculate the average speed per road type and hour for the cases
                temp_case1 = calcAvgSpeedPerRoadTypeAndHour(path_case1, df_net)
                temp_case1.insert(3, 'sample_size', sample_size_as_string)
                temp_case1.insert(4, 'alpha', alpha)
                temp_case1.insert(5, 'stuck_time', default_stuck_time)
                temp_case1.insert(6,'global_seed', "default")
                temp_case1.insert(7,'sample_nr', sampleNr) 


                temp_case3 = calcAvgSpeedPerRoadTypeAndHour(path_case3, df_net)
                temp_case3.insert(3, 'sample_size', sample_size_as_string)
                temp_case3.insert(4, 'alpha', alpha)
                temp_case3.insert(5, 'stuck_time', adjusted_stuck_time)
                temp_case3.insert(6,'global_seed', "default")
                temp_case3.insert(7,'sample_nr', sampleNr)


                avg_speed_per_RoadTypeAndHour_1pct = pd.concat([avg_speed_per_RoadTypeAndHour_1pct, temp_case1, temp_case3], ignore_index= True)

            else:
                alpha = 0.75
                # paths for case 2 and 4 
                path_case2 = "/home/lola/math_cluster/output/output-lausitz-1pct-" + str(sampleNr) + "-fCf_0.01_sCF_" + sCf + "_gS_default_3765/lausitz-1pct-" +str(sampleNr)+ "-fCf_0.01_sCf_0.03162_gS_default_3765"
                path_case4 = "/home/lola/math_cluster/output/output-lausitz-1-pct-" + str(sampleNr) + "-fCf_0.01_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/lausitz-1-pct-" + str(sampleNr) +"-fCf_0.01_sCF_0.03162_gS_4711_sT_3000.0_3765"
                
                # calculate the average speed per road type and hour for the cases
                temp_case2 = calcAvgSpeedPerRoadTypeAndHour(path_case2, df_net)
                temp_case2.insert(3, 'sample_size', sample_size_as_string)
                temp_case2.insert(4, 'alpha', alpha)
                temp_case2.insert(5, 'stuck_time', default_stuck_time)
                temp_case2.insert(6,'global_seed', "default")
                temp_case2.insert(7,'sample_nr', sampleNr)


                temp_case4 = calcAvgSpeedPerRoadTypeAndHour(path_case4, df_net)
                temp_case4.insert(3, 'sample_size', sample_size_as_string)
                temp_case4.insert(4, 'alpha', alpha)
                temp_case4.insert(5, 'stuck_time', adjusted_stuck_time)
                temp_case4.insert(6,'global_seed', "default")
                temp_case4.insert(7,'sample_nr', sampleNr)

                avg_speed_per_RoadTypeAndHour_1pct = pd.concat([avg_speed_per_RoadTypeAndHour_1pct, temp_case2, temp_case4], ignore_index= True)

In [45]:
avg_speed_per_RoadTypeAndHour_1pct.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/Avg_Speed_per_RoadTypeAndHour_1pct.csv', index = False)

### 5 pct

In [52]:
flowCapF = ["0.05"]
storCapF =  ["0.05", "0.10574"]

avg_speed_per_RoadTypeAndHour_5pct = pd.DataFrame()

for fCf in flowCapF:
    for sCf in storCapF:
            for sampleNr in range(1,11,1):
                # calculate adjusted stuck time
                default_stuck_time = 30.0
                adjusted_stuck_time = 30.0/float(flowCapF[0])
                # declare sample size as str "1-pct"
                sample_size_as_string = str(int(float(fCf)*100)) + "-pct"

                # declare path based on case 
                if((fCf == "0.05") & (sCf == "0.05")):
                    alpha = 1.0
                    if (sampleNr == 6):
                        path_case1  = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-6-fCf_sCF_0.05_gS_4711_3765/lausitz-5.0-pct-6-fCf_sCF_0.05_gS_4711_3765-2"
                    
                    else: 
                        path_case1 = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-" + str(sampleNr) + "-fCf_sCF_" + sCf + "_gS_4711_3765/lausitz-5.0-pct-"+str(sampleNr)+ "-fCf_sCF_0.05_gS_4711_3765"
                    
                    
                    path_case3 =  "/home/lola/math_cluster/output/output-lausitz-5-pct-" + str(sampleNr) + "-fCf_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/lausitz-5-pct-" + str(sampleNr) +"-fCf_sCF_0.05_gS_4711_sT_600.0_3765"

                    # calculate the average speed per road type and hour for the cases
                    temp_case1 = calcAvgSpeedPerRoadTypeAndHour(path_case1, df_net)
                    temp_case1.insert(3, 'sample_size', sample_size_as_string)
                    temp_case1.insert(4, 'alpha', alpha)
                    temp_case1.insert(5, 'stuck_time', default_stuck_time)
                    temp_case1.insert(6,'global_seed', "default")
                    temp_case1.insert(7,'sample_nr', sampleNr) 


                    temp_case3 = calcAvgSpeedPerRoadTypeAndHour(path_case3, df_net)
                    temp_case3.insert(3, 'sample_size', sample_size_as_string)
                    temp_case3.insert(4, 'alpha', alpha)
                    temp_case3.insert(5, 'stuck_time', adjusted_stuck_time)
                    temp_case3.insert(6,'global_seed', "default")
                    temp_case3.insert(7,'sample_nr', sampleNr)
                    avg_speed_per_RoadTypeAndHour_5pct = pd.concat([avg_speed_per_RoadTypeAndHour_5pct, temp_case1, temp_case3], ignore_index= True)
                    
                else:
                    alpha = 0.75
                    if(sampleNr == 6):
                        path_case2 = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-6-fCf_0.05_sCF_0.10574_gS_4711_3765/lausitz-5.0-pct-6-fCf_0.05_sCF_0.10574_gS_4711_3765-2"
                    else:
                        path_case2 = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-" + str(sampleNr) + "-fCf_" + fCf + "_sCF_" + sCf  + "_gS_4711_3765/lausitz-5.0-pct-"+str(sampleNr)+ "-fCf_0.05_sCF_0.10574_gS_4711_3765"
                    
                    
                    path_case4 = "/home/lola/math_cluster/output/output-lausitz-5-pct-" + str (sampleNr) + "-fCf_" + fCf + "_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/lausitz-5-pct-" + str(sampleNr) + "-fCf_0.05_sCF_0.10574_gS_4711_sT_600.0_3765"

                    # calculate the average speed per road type and hour for the cases
                    temp_case2 = calcAvgSpeedPerRoadTypeAndHour(path_case2, df_net)
                    temp_case2.insert(3, 'sample_size', sample_size_as_string)
                    temp_case2.insert(4, 'alpha', alpha)
                    temp_case2.insert(5, 'stuck_time', default_stuck_time)
                    temp_case2.insert(6,'global_seed', "default")
                    temp_case2.insert(7,'sample_nr', sampleNr)


                    temp_case4 = calcAvgSpeedPerRoadTypeAndHour(path_case4, df_net)
                    temp_case4.insert(3, 'sample_size', sample_size_as_string)
                    temp_case4.insert(4, 'alpha', alpha)
                    temp_case4.insert(5, 'stuck_time', adjusted_stuck_time)
                    temp_case4.insert(6,'global_seed', "default")
                    temp_case4.insert(7,'sample_nr', sampleNr)

                    avg_speed_per_RoadTypeAndHour_5pct = pd.concat([avg_speed_per_RoadTypeAndHour_5pct, temp_case2, temp_case4], ignore_index= True)

In [None]:
avg_speed_per_RoadTypeAndHour_5pct.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/Avg_Speed_per_RoadTypeAndHour_5pct.csv', index = False)

In [None]:
def avgSpeedPerRoadTypeAndHour10pct():
    flowCapF = ["0.1"]
    storCapF =  ["0.1", "0.17783"]

    avg_speed_per_RoadTypeAndHour_10pct = pd.DataFrame(df_net)
    for fCf in flowCapF:
        for sCf in storCapF:
                for sampleNr in range(1,11,1):
                    # calculate adjusted stuck time
                    default_stuck_time = 30.0
                    adj_sT = 30.0/float(flowCapF[0])
                    # declare sample size as str "1-pct"
                    sample_size_as_string = str(int(float(fCf)*100)) + "-pct"
                    # declare path based on case 
                    if((fCf == "0.1") & (sCf == "0.1")):
                        alpha = 1.0  
                        path_case1 = "/net/ils/mersini/output/output-lausitz-10.0-pct-" + str(sampleNr) + "-fCf_sCF_" + sCf + "_gS_4711_3765/output-lausitz-10.0-pct-" + str(sampleNr) + "-fCf_sCF_" + sCf + "_gS_4711_3765"
                        path_case3 = "/net/ils/mersini/output/output-lausitz-10-pct-" + str(sampleNr) + "-fCf_sCF_" + sCf + "_gS_4711_sT_" + str(adj_sT) + "_3765/output-lausitz-10-pct-" + str(sampleNr) + "-fCf_sCF_" + sCf + "_gS_4711_sT_" + str(adj_sT) + "_3765"

                        # calculate the average speed per road type and hour for the cases
                        temp_case1 = calcAvgSpeedPerRoadTypeAndHour(path_case1, df_net)
                        temp_case1.insert(3, 'sample_size', sample_size_as_string)
                        temp_case1.insert(4, 'alpha', alpha)
                        temp_case1.insert(5, 'stuck_time', default_stuck_time)
                        temp_case1.insert(6,'global_seed', "default")
                        temp_case1.insert(7,'sample_nr', sampleNr) 


                        temp_case3 = calcAvgSpeedPerRoadTypeAndHour(path_case3, df_net)
                        temp_case3.insert(3, 'sample_size', sample_size_as_string)
                        temp_case3.insert(4, 'alpha', alpha)
                        temp_case3.insert(5, 'stuck_time', adj_sT)
                        temp_case3.insert(6,'global_seed', "default")
                        temp_case3.insert(7,'sample_nr', sampleNr)
                        avg_speed_per_RoadTypeAndHour_10pct = pd.concat([avg_speed_per_RoadTypeAndHour_10pct, temp_case1, temp_case3], ignore_index= True)

                    elif((fCf == "0.1") & (sCf ==  "0.17783")):
                        alpha = 0.75
                        path_case2 = "/net/ils/mersini/output/output-lausitz-10.0-pct-" + str(sampleNr) + "-fCf_" + fCf + "_sCF_" + sCf + "_gS_4711_3765/output-lausitz-10.0-pct-" + str(sampleNr) + "-fCf_" + fCf + "_sCF_" + sCf + "_gS_4711_3765"
                        path_case4 = "/net/ils/mersini/output/output-lausitz-10-pct-" + str(sampleNr) + "-fCf_" + fCf + "_sCF_" + sCf + "_gS_4711_sT_" + str(adj_sT) + "_3765/output-lausitz-10-pct-" + str(sampleNr) + "-fCf_" + fCf + "_sCF_" + sCf + "_gS_4711_sT_" + str(adj_sT) + "_3765"
                        
                        # calculate the average speed per road type and hour for the cases
                        temp_case2 = calcAvgSpeedPerRoadTypeAndHour(path_case2, df_net)
                        temp_case2.insert(3, 'sample_size', sample_size_as_string)
                        temp_case2.insert(4, 'alpha', alpha)
                        temp_case2.insert(5, 'stuck_time', default_stuck_time)
                        temp_case2.insert(6,'global_seed', "default")
                        temp_case2.insert(7,'sample_nr', sampleNr)


                        temp_case4 = calcAvgSpeedPerRoadTypeAndHour(path_case4, df_net)
                        temp_case4.insert(3, 'sample_size', sample_size_as_string)
                        temp_case4.insert(4, 'alpha', alpha)
                        temp_case4.insert(5, 'stuck_time', adj_sT)
                        temp_case4.insert(6,'global_seed', "default")
                        temp_case4.insert(7,'sample_nr', sampleNr)

                        avg_speed_per_RoadTypeAndHour_10pct = pd.concat([avg_speed_per_RoadTypeAndHour_10pct, temp_case2, temp_case4], ignore_index= True)
        avg_speed_per_RoadTypeAndHour_10pct.to_csv('/net/ils/mersini/output/Avg_Speed_per_RoadTypeAndHour_10pct.csv', index = False)
    



### 25 pct

In [None]:
def calcAvgSpeedPerHourAndRoadType25Pct(df_net):
    flowCapF = ["0.25"]
    storCapF =  ["0.25", "0.35355"]

    avg_speed_per_RoadTypeAndHour_25pct = pd.DataFrame()

    for fCf in flowCapF:
        for sCf in storCapF:
            for sampleNr in range(1,2,1):
                # calculate adjusted stuck time
                default_stuck_time = 30.0
                adj_sT = 30.0/float(flowCapF[0])
                # declare sample size as str "1-pct"
                sample_size_as_string = str(int(float(fCf)*100)) + "-pct"
                # declare path based on case 
                if((fCf == "0.25") & (sCf == "0.25")):
                    alpha = 1.0 
                    path_case1 = "/net/ils/mersini/output/output-lausitz-25.0-pct-fCf_sCF_" + sCf + "_gS_4711_3765/output-lausitz-25.0-pct-fCf_sCF_" + sCf + "_gS_4711_3765"
                    path_case3 = "/net/ils/mersini/output/output-lausitz-25-pct-" + str(sampleNr) + "-fCf_sCF_" + fCf + "_gS_4711_sT_" + str(adj_sT) + "_3765/output-lausitz-25-pct-" + str(sampleNr) + "-fCf_sCF_" + fCf + "_gS_4711_sT_" + str(adj_sT) + "_3765"

                    # calculate the average speed per road type and hour for the cases
                    temp_case1 = calcAvgSpeedPerRoadTypeAndHour(path_case1, df_net)
                    temp_case1.insert(3, 'sample_size', sample_size_as_string)
                    temp_case1.insert(4, 'alpha', alpha)
                    temp_case1.insert(5, 'stuck_time', default_stuck_time)
                    temp_case1.insert(6,'global_seed', "default")
                    temp_case1.insert(7,'sample_nr', sampleNr) 


                    temp_case3 = calcAvgSpeedPerRoadTypeAndHour(path_case3, df_net)
                    temp_case3.insert(3, 'sample_size', sample_size_as_string)
                    temp_case3.insert(4, 'alpha', alpha)
                    temp_case3.insert(5, 'stuck_time', adj_sT)
                    temp_case3.insert(6,'global_seed', "default")
                    temp_case3.insert(7,'sample_nr', sampleNr)
                    avg_speed_per_RoadTypeAndHour_25pct = pd.concat([avg_speed_per_RoadTypeAndHour_25pct, temp_case1, temp_case3], ignore_index= True)
                        
                elif((fCf == "0.25") & (sCf ==  "0.35355")):
                    alpha  = 0.75
                    path_case2 = "/net/ils/mersini/output/output-lausitz-25.0-pct-fCf_" + fCf + "_sCF_" + sCf + "_gS_4711_3765/output-lausitz-25.0-pct-fCf_" + fCf + "_sCF_" + sCf + "_gS_4711_3765"
                    path_case4 = "/net/ils/mersini/output/output-lausitz-25-pct-" + str(sampleNr) + "-fCf_" + fCf + "_sCF_" + sCf + "_gS_4711_sT_" + str(adj_sT) + "_3765/output-lausitz-25-pct-" + str(sampleNr) + "-fCf_" + fCf + "_sCF_" + sCf + "_gS_4711_sT_" + str(adj_sT) + "_3765"

                    # calculate the average speed per road type and hour for the cases
                    temp_case2 = calcAvgSpeedPerRoadTypeAndHour(path_case2, df_net)
                    temp_case2.insert(3, 'sample_size', sample_size_as_string)
                    temp_case2.insert(4, 'alpha', alpha)
                    temp_case2.insert(5, 'stuck_time', default_stuck_time)
                    temp_case2.insert(6,'global_seed', "default")
                    temp_case2.insert(7,'sample_nr', sampleNr)


                    temp_case4 = calcAvgSpeedPerRoadTypeAndHour(path_case4, df_net)
                    temp_case4.insert(3, 'sample_size', sample_size_as_string)
                    temp_case4.insert(4, 'alpha', alpha)
                    temp_case4.insert(5, 'stuck_time', adj_sT)
                    temp_case4.insert(6,'global_seed', "default")
                    temp_case4.insert(7,'sample_nr', sampleNr)

                    avg_speed_per_RoadTypeAndHour_25pct = pd.concat([avg_speed_per_RoadTypeAndHour_25pct, temp_case2, temp_case4], ignore_index= True)
                    
            avg_speed_per_RoadTypeAndHour_25pct.to_csv('/net/ils/mersini/output/Avg_Speed_per_RoadTypeAndHour_25pct.csv', index = False)
                    


### 50 pct

In [None]:
def calvAvgSpeedPerRoadTypeANdHour50pct(df_net):
    flowCapF = ["0.5"]
    storCapF =  ["0.5", "0.5946"]

    avg_speed_per_RoadTypeAndHour_50pct = pd.DataFrame()

    for fCf in flowCapF:
        for sCf in storCapF:
            for sampleNr in range(1,2,1):
                # calculate adjusted stuck time
                default_stuck_time = 30.0
                adj_sT = 30.0/float(flowCapF[0])
                # declare sample size as str "1-pct"
                sample_size_as_string = str(int(float(fCf)*100)) + "-pct"
                # declare path based on case 
                if((fCf == "0.5") & (sCf == "0.5")):
                    alpha = 1.0
                    path_case1 = "/net/ils/mersini/output/output-lausitz-50.0-pct-fCf_sCF_" + sCf + "_gS_4711_3765/output-lausitz-50.0-pct-fCf_sCF_0.5_gS_4711_3765"
                    path_case3 = "/net/ils/mersini/output/output-lausitz-50-pct-1-fCf_sCF_" + sCf + "_gS_4711_sT_" +str(adj_sT) + "_3765/output-lausitz-50-pct-1-fCf_sCF_0.5_gS_4711_sT_60.0_3765"

                    # calculate the average speed per road type and hour for the cases
                    temp_case1 = calcAvgSpeedPerRoadTypeAndHour(path_case1, df_net)
                    temp_case1.insert(3, 'sample_size', sample_size_as_string)
                    temp_case1.insert(4, 'alpha', alpha)
                    temp_case1.insert(5, 'stuck_time', default_stuck_time)
                    temp_case1.insert(6,'global_seed', "default")
                    temp_case1.insert(7,'sample_nr', sampleNr) 


                    temp_case3 = calcAvgSpeedPerRoadTypeAndHour(path_case3, df_net)
                    temp_case3.insert(3, 'sample_size', sample_size_as_string)
                    temp_case3.insert(4, 'alpha', alpha)
                    temp_case3.insert(5, 'stuck_time', adj_sT)
                    temp_case3.insert(6,'global_seed', "default")
                    temp_case3.insert(7,'sample_nr', sampleNr)
                    avg_speed_per_RoadTypeAndHour_50pct = pd.concat([avg_speed_per_RoadTypeAndHour_50pct, temp_case1, temp_case3], ignore_index= True)
                        
                elif((fCf == "0.5") & (sCf ==  "0.5946") ):
                    alpha = 0.75
                    path_case2 = "/net/ils/mersini/output/output-lausitz-50.0-pct-fCf_" + fCf + "_sCF_" + sCf + "_gS_4711_3765/output-lausitz-50.0-pct-fCf_0.5_sCF_0.5946_gS_4711_3765"
                    
                    path_case4 = "/net/ils/mersini/output/output-lausitz-50-pct-1-fCf_" + fCf + "_sCF_" + sCf + "_gS_4711_sT_" + str(adj_sT) + "_3765/output-lausitz-50-pct-1-fCf_0.5_sCF_0.5946_gS_4711_sT_60.0_3765"

                    # calculate the average speed per road type and hour for the cases
                    temp_case2 = calcAvgSpeedPerRoadTypeAndHour(path_case2, df_net)
                    temp_case2.insert(3, 'sample_size', sample_size_as_string)
                    temp_case2.insert(4, 'alpha', alpha)
                    temp_case2.insert(5, 'stuck_time', default_stuck_time)
                    temp_case2.insert(6,'global_seed', "default")
                    temp_case2.insert(7,'sample_nr', sampleNr)


                    temp_case4 = calcAvgSpeedPerRoadTypeAndHour(path_case4, df_net)
                    temp_case4.insert(3, 'sample_size', sample_size_as_string)
                    temp_case4.insert(4, 'alpha', alpha)
                    temp_case4.insert(5, 'stuck_time', adj_sT)
                    temp_case4.insert(6,'global_seed', "default")
                    temp_case4.insert(7,'sample_nr', sampleNr)

                    avg_speed_per_RoadTypeAndHour_50pct = pd.concat([avg_speed_per_RoadTypeAndHour_50pct, temp_case2, temp_case4], ignore_index= True)
        avg_speed_per_RoadTypeAndHour_50pct.to_csv('/net/ils/mersini/output/Avg_Speed_per_RoadTypeAndHour_50pct.csv', index = False)



### 100 pct

In [None]:
def calcAvgSpeedPerRoadTypeAndHour(df_net):   
    flowCapF = ["1.0"]
    avg_speed_per_RoadTypeAndHour_100pct = pd.DataFrame()
    for fCf in flowCapF:
        for sampleNr in range(1,2,1):
            alpha = 1.0
            # calculate adjusted stuck time
            default_stuck_time = 30.0
            # declare sample size as str "1-pct"
            sample_size_as_string = str(int(float(fCf)*100)) + "-pct"
            # declare path based on case 
            path_case1 = "/net/ils/mersini/output/output-lausitz-100.0-pct-fCf_sCF_1.0_gS_4711_3765/output-lausitz-100.0-pct-fCf_sCF_1.0_gS_4711_3765"
            # calculate the average speed per road type and hour for the cases
            temp_case1 = calcAvgSpeedPerRoadTypeAndHour(path_case1, df_net)
            temp_case1.insert(3, 'sample_size', sample_size_as_string)
            temp_case1.insert(4, 'alpha', alpha)
            temp_case1.insert(5, 'stuck_time', default_stuck_time)
            temp_case1.insert(6,'global_seed', "default")
            temp_case1.insert(7,'sample_nr', sampleNr)
            avg_speed_per_RoadTypeAndHour_100pct = pd.concat([avg_speed_per_RoadTypeAndHour_100pct, temp_case1], ignore_index= True)
            avg_speed_per_RoadTypeAndHour_100pct.to_csv('/net/ils/mersini/output/Avg_Speed_per_RoadTypeAndHour_100pct.csv', index = False)

## 4. Executed Scores at iteration 500

In [5]:
# extract all necessary values from experienced plans
pathToExecutedPlans = "/home/lola/math_cluster/output/output-lausitz-1pct-1-fCf_sCF_0.01_gS_default_3765/lausitz-1pct-1-fCf_sCf_0.01_gS_default_3765.output_experienced_plans.xml.gz"
#def exectuedPlansToDf(pathToExecutedPlans):
input = gzip.open(pathToExecutedPlans, 'r')
tree = ET.parse(input)
root = tree.getroot()


id_sc_storage = []
score_storage = []


id_trip_storage = []
trip_counter_storage = []
trav_time_storage = []
trav_dist_storage = []

for person in root.findall('person'):
    trip_counter = 0
    for plan in person.findall('plan'):
        if (plan.attrib['selected'] == "yes"):
            id_sc_storage.append(person.attrib['id'])
            score_storage.append(plan.attrib['score'])

            for leg in plan.findall('leg'):
                if(leg.attrib['mode'] == "car"):
                    trip_counter += 1
                    for route in leg.findall('route'):
                        id_trip_storage.append(person.attrib['id'])
                        trip_counter_storage.append(trip_counter)
                        trav_time_storage.append(route.attrib['trav_time'])
                        trav_dist_storage.append(route.attrib['distance'])
                else:
                    continue
        else: 
            continue

df_trips = pd.DataFrame({'person_id': id_trip_storage, 'trip_number': trip_counter_storage, 'trav_time': trav_time_storage, 'distance': trav_dist_storage })
df_trips['trav_time_in_seconds'] = df_trips.trav_time.astype('timedelta64[s]')/  pd.Timedelta(seconds=1)

avg_trav_time_min = np.mean(df_trips['trav_time_in_seconds'])/60

df_score = pd.DataFrame({'person_id': id_sc_storage, 'exp_score': score_storage })


#### 1 pct

In [None]:
#### scores ####
# 1pct, alpha = 1
scores_1pct =pd.DataFrame()
for elem in range(1,11,1):
    path = "/home/lola/math_cluster/output/output-lausitz-1pct-" + str(elem) + "-fCf_sCF_0.01_gS_default_3765/lausitz-1pct-" + str(elem) + "-fCf_sCf_0.01_gS_default_3765.scorestats.csv"
    temp = pd.read_csv(path, sep=";")
    df = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "1-pct", "sample_nr": elem, "alpha": 1.0, "stuck_time": 30.0, "global_seed": "default" }, index = [elem])
    scores_1pct = pd.concat([scores_1pct, df], axis = 0)
    

In [None]:
# 1 pct, alpha = 0.75
scores_1pct_sCf = pd.DataFrame()
for elem in range(1,11,1):
    path = "/home/lola/math_cluster/output/output-lausitz-1pct-" + str(elem) + "-fCf_0.01_sCF_0.03162_gS_default_3765/lausitz-1pct-" + str(elem) + "-fCf_0.01_sCf_0.03162_gS_default_3765.scorestats.csv"
    temp = pd.read_csv(path, sep=";")
    df = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "1-pct", "sample_nr": elem, "alpha": 0.75, "stuck_time": 30.0, "global_seed": "default" }, index = [elem])
    scores_1pct_sCf = pd.concat([scores_1pct_sCf, df], axis = 0)

In [None]:
# 1 pct, alpha 1, random global seed
#  1 pct random seed
scores_1pct_rGs = pd.DataFrame()
rGs = [ 4711,3254, 2306, 6384,4338, 6003, 5502, 9377, 5621, 9002 ]
for seed in rGs:
    if (seed == 4711):
        global_seed = "rnd_" + str(seed)
        # insert number of stuck time violations from the first 1 pct sample
        df = pd.DataFrame({"avg_executed_it_500":scores_1pct["avg_executed_it_500"].iloc[0], "sample_size": "1-pct", "sample_nr": 1, "alpha": 1.0, "stuck_time": 30.0, "global_seed": global_seed }, index = [rGs.index(seed)])
        scores_1pct_rGs = pd.concat([scores_1pct_rGs, df])
    elif (seed == 3254):
        global_seed = "rnd_" + str(seed)
        path ="/home/lola/math_cluster/output/output-lausitz-1pct-1-fCf_sCF_0.01_gS_3254_3765/lausitz-1pct-1-fCf_sCf_0.01_gS_3254_3765.scorestats.csv"
        temp = pd.read_csv(path, sep = ";")
        df = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "1-pct", "sample_nr": 1, "alpha":1.0, "stuck_time": 30.0, "global_seed": global_seed }, index = [rGs.index(seed)])
        scores_1pct_rGs = pd.concat([scores_1pct_rGs, df], axis = 0)
    else:
        global_seed = "rnd_" + str(seed)
        path = "/home/lola/math_cluster/output/output-lausitz-1.0-pct-1-fCf_sCF_0.01_gS_" + str(seed) + "_3765/lausitz-1.0-pct-1-fCf_sCF_0.01_gS_" + str(seed) + "_3765.scorestats.csv"
        temp = pd.read_csv(path, sep = ";")
        df = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "1-pct", "sample_nr": 1, "alpha":1.0, "stuck_time": 30.0, "global_seed": global_seed }, index = [rGs.index(seed)])
        scores_1pct_rGs = pd.concat([scores_1pct_rGs, df], axis = 0)

In [None]:
# 1 pct, alpha = 1, sT scaled
scores_1pct_sT =pd.DataFrame()
for elem in range(1,11,1):
    path = "/home/lola/math_cluster/output/output-lausitz-1-pct-" + str(elem) + "-fCf_sCF_0.01_gS_4711_sT_3000.0_3765/lausitz-1-pct-" + str(elem) + "-fCf_sCF_0.01_gS_4711_sT_3000.0_3765.scorestats.csv"
    temp = pd.read_csv(path, sep=";")
    df = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "1-pct", "sample_nr": elem, "alpha": 1.0, "stuck_time": 3000.0, "global_seed": "default" }, index = [elem])
    scores_1pct_sT = pd.concat([scores_1pct_sT, df], axis = 0)


In [None]:
# 1 pct, alpha = 0.75, sT scaled
scores_1pct_sT_sCf =pd.DataFrame()
for elem in range(1,11,1):
    path = "/home/lola/math_cluster/output/output-lausitz-1-pct-"+ str(elem) + "-fCf_0.01_sCF_0.03162_gS_4711_sT_3000.0_3765/lausitz-1-pct-" + str(elem) + "-fCf_0.01_sCF_0.03162_gS_4711_sT_3000.0_3765.scorestats.csv"
    temp = pd.read_csv(path, sep=";")
    df = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "1-pct", "sample_nr": elem, "alpha": 0.75, "stuck_time": 3000.0, "global_seed": "default" }, index = [elem])
    scores_1pct_sT_sCf = pd.concat([scores_1pct_sT_sCf, df], axis = 0)

In [None]:
scores_1pct_all = pd.concat([scores_1pct, scores_1pct_sCf, scores_1pct_rGs, scores_1pct_sT, scores_1pct_sT_sCf])


In [None]:
scores_1pct_all.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/scores_all_1pct_samples.csv', index = False) 

#### 5 pct 

In [None]:
#### scores, 5 pct ####
# 5pct, alpha = 1
scores_5pct = pd.DataFrame()

for elem in range(1,11,1):
    if (elem==6):
        path = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-6-fCf_sCF_0.05_gS_4711_3765/lausitz-5.0-pct-6-fCf_sCF_0.05_gS_4711_3765-2.scorestats.csv"
        temp = pd.read_csv(path, sep=";")
        df = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "5-pct", "sample_nr": elem, "alpha": 1.0, "stuck_time": 30.0, "global_seed": "default" }, index = [elem])
        scores_5pct = pd.concat([scores_5pct, df], axis = 0)
    else:
        path = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-" + str(elem) + "-fCf_sCF_0.05_gS_4711_3765/lausitz-5.0-pct-" + str(elem) + "-fCf_sCF_0.05_gS_4711_3765.scorestats.csv"
        temp = pd.read_csv(path, sep=";")
        df = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "5-pct", "sample_nr": elem, "alpha": 1.0, "stuck_time": 30.0, "global_seed": "default" }, index = [elem])
        scores_5pct = pd.concat([scores_5pct, df], axis = 0)


In [None]:
# 5 pct, alpha 0.75 
scores_5pct_sCf = pd.DataFrame()

for elem in range(1,11,1):
    if (elem==6):
        path ="/home/lola/math_cluster/output/output-lausitz-5.0-pct-6-fCf_0.05_sCF_0.10574_gS_4711_3765/lausitz-5.0-pct-6-fCf_0.05_sCF_0.10574_gS_4711_3765-2.scorestats.csv"
        temp = pd.read_csv(path, sep=";")
        df = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "5-pct", "sample_nr": elem, "alpha": 0.75, "stuck_time": 30.0, "global_seed": "default" }, index = [elem])
        scores_5pct_sCf = pd.concat([scores_5pct_sCf, df], axis = 0)

    else:
        path = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-" + str(elem) + "-fCf_0.05_sCF_0.10574_gS_4711_3765/lausitz-5.0-pct-" + str(elem) + "-fCf_0.05_sCF_0.10574_gS_4711_3765.scorestats.csv"
        temp = pd.read_csv(path, sep=";")
        df = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "5-pct", "sample_nr": elem, "alpha":0.75, "stuck_time": 30.0, "global_seed": "default" }, index = [elem])
        scores_5pct_sCf = pd.concat([scores_5pct_sCf, df], axis = 0)

In [None]:
scores_5pct_rGs = pd.DataFrame()
rGs = [4711, 3254, 2306, 6384,4338, 6003, 5502, 9377, 5621, 9002 ]

for seed in rGs:
    if (seed ==4711):
        global_seed = "rnd_" + str(seed)
        temp = {'avg_executed_it_500': scores_5pct["avg_executed_it_500"].iloc[0], 'sample_size': '5-pct', "sample_nr": 1, 'alpha': 1.0, 'stuck_time': 30.0, 'global_seed': global_seed  }
        temp = pd.DataFrame(data=temp, index=[rGs.index(4711)])
        scores_5pct_rGs = pd.concat([scores_5pct_rGs, temp])
    else:
        global_seed = "rnd_" + str(seed)
        path = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-1-fCf_sCF_0.05_gS_" + str(seed) + "_3765/lausitz-5.0-pct-1-fCf_sCF_0.05_gS_" + str(seed) + "_3765.scorestats.csv"
        temp = pd.read_csv(path, sep = ";")
        temp = {'avg_executed_it_500': temp["avg_executed"].iloc[500], 'sample_size': '5-pct', "sample_nr": 1, 'alpha': 1.0, 'stuck_time': 30.0, 'global_seed': global_seed  }
        df = pd.DataFrame(data=temp, index=[rGs.index(seed)])
        scores_5pct_rGs = pd.concat([scores_5pct_rGs, df], axis = 0)


In [None]:
# 5 pct, alpha 1.0, sT scaled
scores_5pct_sT =pd.DataFrame()
for elem in range(1,11,1):
    path = "/home/lola/math_cluster/output/output-lausitz-5-pct-" + str(elem) + "-fCf_sCF_0.05_gS_4711_sT_600.0_3765/lausitz-5-pct-" + str(elem) + "-fCf_sCF_0.05_gS_4711_sT_600.0_3765.scorestats.csv"
    temp = pd.read_csv(path, sep=";")
    df = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "5-pct", "sample_nr": elem, "alpha": 1.0, "stuck_time": 600.0, "global_seed": "default" }, index = [elem])
    scores_5pct_sT = pd.concat([scores_5pct_sT, df], axis = 0)




In [None]:
# 5 pct, alpha 0.75, sT scaled
scores_5pct_sT_sCf =pd.DataFrame()
for elem in range(1,11,1):
    path = "/home/lola/math_cluster/output/output-lausitz-5-pct-" + str(elem) + "-fCf_0.05_sCF_0.10574_gS_4711_sT_600.0_3765/lausitz-5-pct-" + str(elem) + "-fCf_0.05_sCF_0.10574_gS_4711_sT_600.0_3765.scorestats.csv"
    temp = pd.read_csv(path, sep=";")
    df = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "5-pct", "sample_nr": elem, "alpha": 0.75, "stuck_time": 600.0, "global_seed": "default" }, index = [elem])
    scores_5pct_sT_sCf = pd.concat([scores_5pct_sT_sCf, df], axis = 0)

  

In [None]:
scores_5pct_all = pd.concat([scores_5pct, scores_5pct_sCf, scores_5pct_rGs, scores_5pct_sT, scores_5pct_sT_sCf])


In [None]:
scores_5pct_all.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/scores_all_5pct_samples.csv', index = False) 

#### 10 pct

In [None]:
#### scores, 10 pct ####
# 10 pct, alpha = 1.0
scores_10pct = pd.DataFrame()
for elem in range(1,11,1):
    path = "/home/lola/math_cluster/output/output-lausitz-10.0-pct-" + str(elem) + "-fCf_sCF_0.1_gS_4711_3765/lausitz-10.0-pct-" + str(elem) + "-fCf_sCF_0.1_gS_4711_3765.scorestats.csv"
    temp = pd.read_csv(path, sep=";")
    df = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "10-pct", "sample_nr": elem, "alpha": 1.0, "stuck_time": 30.0, "global_seed": "default" }, index = [elem])
    scores_10pct = pd.concat([scores_10pct, df], axis = 0)


In [None]:
# 10 pct, alpha = 0.75
scores_10pct_sCf = pd.DataFrame()
for elem in range(1,11,1):
    path = "/home/lola/math_cluster/output/output-lausitz-10.0-pct-" + str(elem) + "-fCf_0.1_sCF_0.17783_gS_4711_3765/lausitz-10.0-pct-" + str(elem) + "-fCf_0.1_sCF_0.17783_gS_4711_3765.scorestats.csv"
    temp = pd.read_csv(path, sep=";")
    df = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "10-pct", "sample_nr": elem, "alpha": 0.75, "stuck_time": 30.0, "global_seed": "default" }, index = [elem])
    scores_10pct = pd.concat([scores_10pct, df], axis = 0)

In [None]:
# 10 pct, alpha = 1, sT scaled
scores_10pct_sT = pd.DataFrame()
for elem in range(1,11,1):
    path = "/home/lola/math_cluster/output/output-lausitz-10-pct-" + str(elem) + "-fCf_sCF_0.1_gS_4711_sT_300.0_3765/lausitz-10-pct-" + str(elem) + "-fCf_sCF_0.1_gS_4711_sT_300.0_3765.scorestats.csv"
    temp = pd.read_csv(path, sep=";")
    df = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "10-pct", "sample_nr": elem, "alpha": 1.0, "stuck_time": 300.0, "global_seed": "default" }, index = [elem])
    scores_10pct_sT = pd.concat([scores_10pct_sT, df], axis = 0)

In [None]:
# 10 pct, alpha = 0.75, sT scaled
# 10 pct, alpha = 1, sT scaled
scores_10pct_sT_sCf = pd.DataFrame()
for elem in range(1,11,1):
    path = "/home/lola/math_cluster/output/output-lausitz-10-pct-" + str(elem) + "-fCf_0.1_sCF_0.17783_gS_4711_sT_300.0_3765/lausitz-10-pct-" + str(elem) + "-fCf_0.1_sCF_0.17783_gS_4711_sT_300.0_3765.scorestats.csv"
    temp = pd.read_csv(path, sep=";")
    df = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "10-pct", "sample_nr": elem, "alpha": 0.75, "stuck_time": 300.0, "global_seed": "default" }, index = [elem])
    scores_10pct_sT_sCf = pd.concat([scores_10pct_sT_sCf, df], axis = 0)


In [None]:
scores_10pct_all = pd.concat([scores_10pct, scores_10pct_sCf, scores_10pct_sT, scores_10pct_sT_sCf])

In [None]:
scores_10pct_all.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/scores_all_10pct_samples.csv', index = False) 

#### 25 pct

In [None]:
#### scores, 25 pct ####
scores_25pct = pd.DataFrame()
 # 25 pct, alpha = 1
path = "/home/lola/math_cluster/output/output-lausitz-25.0-pct-fCf_sCF_0.25_gS_4711_3765/lausitz-25.0-pct-fCf_sCF_0.25_gS_4711_3765.scorestats.csv"
temp = pd.read_csv(path, sep = ";")
df = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "25-pct", "sample_nr": 1, "alpha": 1.0, "stuck_time": 30.0, "global_seed": "default" }, index = [0])
scores_25pct = pd.concat([scores_25pct, df], axis = 0)

# 25 pct, alpha = 0.75
path = "/home/lola/math_cluster/output/output-lausitz-25.0-pct-fCf_0.25_sCF_0.35355_gS_4711_3765/lausitz-25.0-pct-fCf_0.25_sCF_0.35355_gS_4711_3765.scorestats.csv"
temp = pd.read_csv(path, sep = ";")
df = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "25-pct", "sample_nr": 1, "alpha": 0.75, "stuck_time": 30.0, "global_seed": "default" }, index = [1])
scores_25pct = pd.concat([scores_25pct, df], axis = 0)

 # 25 pct, alpha = 1, sT scaled
path = "/home/lola/math_cluster/output/output-lausitz-25-pct-1-fCf_sCF_0.25_gS_4711_sT_120.0_3765/lausitz-25-pct-1-fCf_sCF_0.25_gS_4711_sT_120.0_3765.scorestats.csv"
temp = pd.read_csv(path, sep = ";")
df = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "25-pct", "sample_nr": 1, "alpha": 1.0, "stuck_time": 120.0, "global_seed": "default" }, index = [2])
scores_25pct = pd.concat([scores_25pct, df], axis = 0)

 # 25 pct, alpha = 0.75, sT scaled
path = "/home/lola/math_cluster/output/output-lausitz-25-pct-1-fCf_0.25_sCF_0.35355_gS_4711_sT_120.0_3765/lausitz-25-pct-1-fCf_0.25_sCF_0.35355_gS_4711_sT_120.0_3765.scorestats.csv"
temp = pd.read_csv(path, sep = ";")
df = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "25-pct", "sample_nr": 1, "alpha": 0.75, "stuck_time": 120.0, "global_seed": "default" }, index = [0])
scores_25pct = pd.concat([scores_25pct, df], axis = 0)

 

In [None]:
scores_25pct.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/scores_all_25pct_samples.csv', index = False) 

#### 50 pct

In [None]:
#### scores, 50 pct #### 
scores_50pct = pd.DataFrame()
# 50 pct, alpha = 1
path = "/home/lola/math_cluster/output/output-lausitz-50.0-pct-fCf_sCF_0.5_gS_4711_3765/lausitz-50.0-pct-fCf_sCF_0.5_gS_4711_3765.scorestats.csv"
temp = pd.read_csv(path, sep = ";")
df = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "50-pct", "sample_nr": 1, "alpha": 1.0, "stuck_time": 30.0, "global_seed": "default" }, index = [0])
scores_50pct = pd.concat([scores_50pct, df], axis = 0)

# 50 pct, alpha = 0.75
path = "/home/lola/math_cluster/output/output-lausitz-50.0-pct-fCf_0.5_sCF_0.5946_gS_4711_3765/lausitz-50.0-pct-fCf_0.5_sCF_0.5946_gS_4711_3765.scorestats.csv"
temp = pd.read_csv(path, sep = ";")
df = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "50-pct", "sample_nr": 1, "alpha": 0.75, "stuck_time": 30.0, "global_seed": "default" }, index = [1])
scores_50pct = pd.concat([scores_50pct, df], axis = 0)

# 50 pct/home/lola/math_cluster/output/output-lausitz-50-pct-1-fCf_sCF_0.5_gS_4711_sT_60.0_3765/lausitz-50-pct-1-fCf_sCF_0.5_gS_4711_sT_60.0_3765.scorestats.csv"
temp = pd.read_csv(path, sep = ";")
df = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "50-pct", "sample_nr": 1, "alpha": 1.0, "stuck_time": 60.0, "global_seed": "default" }, index = [2])
scores_50pct = pd.concat([scores_50pct, df], axis = 0)

# 50 pct, alpha = 0.75, sT scaled
path = "/home/lola/math_cluster/output/output-lausitz-50-pct-1-fCf_0.5_sCF_0.5946_gS_4711_sT_60.0_3765/lausitz-50-pct-1-fCf_0.5_sCF_0.5946_gS_4711_sT_60.0_3765.scorestats.csv"
temp = pd.read_csv(path, sep = ";")
df = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "50-pct", "sample_nr": 1, "alpha": 0.75, "stuck_time": 60.0, "global_seed": "default" }, index = [0])
scores_50pct = pd.concat([scores_50pct, df], axis = 0)


# 25 pct doubled, alpha = 1.0, sT 30.0
path = "/home/lola/Nextcloud/Masterarbeit/03_Outputs_From_RunsLausitz/output-lausitz-25-pct-doubled-fCf_0.5_sCF_0.5_gS_4711_3765/lausitz-25-pct-doubled-fCf_0.5_sCF_0.5_gS_4711__3765.scorestats.csv"
temp = pd.read_csv(path, sep = ";")
df = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "25-pct-doubled", "sample_nr": 1, "alpha": 1.0 , "stuck_time": 30.0, "global_seed": "default" }, index = [0])
scores_50pct = pd.concat([scores_50pct, df], axis = 0)



In [None]:
scores_50pct.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/scores_all_50pct_samples.csv', index = False)

#### 100 pct

In [None]:
#### scores , 100 pct ####
path = "/home/lola/math_cluster/output/output-lausitz-100.0-pct-fCf_sCF_1.0_gS_4711_3765/lausitz-100.0-pct-fCf_sCF_1.0_gS_4711_3765.scorestats.csv"
temp = pd.read_csv(path, sep = ";")
scores_100pct = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "100-pct", "sample_nr": 1, "alpha": 1.0, "stuck_time": 30.0, "global_seed": "default" }, index = [0])

path = "/home/lola/Nextcloud/Masterarbeit/03_Outputs_From_RunsLausitz/output-lausitz-25.0-pct-quadrupled-fCf_1.0_sCF_1.0_gS_4711_3765/lausitz-25-pct-quadrupled-fCf_1.0_sCF_1.0_gS_4711__3765.scorestats.csv"
temp = pd.read_csv(path, sep = ";")
scores_25_pct_quadrupled = pd.DataFrame({"avg_executed_it_500":temp["avg_executed"].iloc[500], "sample_size": "25-pct-quadrupled", "sample_nr": 1, "alpha": 1.0, "stuck_time": 30.0, "global_seed": "default" }, index = [0])

In [None]:
scores_1_100 = pd.concat([scores_1pct_all, scores_5pct_all, scores_10pct_all, scores_25pct, scores_50pct, scores_100pct, scores_25_pct_quadrupled], axis = 0)

In [None]:
scores_1_100.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/scores_all_1_100pct_samples.csv', index = False) 

## 5./6. Average travel time and traveled distance & N departures

In [2]:
def calcAvgTravelTimeAndDist(pathToFile, sampleSize, sampleNr, alpha, stuckTime, globalSeed):
    temp = pd.read_csv(pathToFile, compression= "gzip", sep=";")
    temp['trav_time_seconds'] = temp.trav_time.astype('timedelta64[s]')/  pd.Timedelta(seconds=1)
    temp['dep_hour'] = np.floor(temp.dep_time.astype('timedelta64[s]')/  pd.Timedelta(minutes=60))
    aTt = np.mean(temp['trav_time_seconds'])/60
    aTt_car = np.mean(temp[temp['main_mode'] == "car"]['trav_time_seconds'])/60
    aTt_ride = np.mean(temp[temp['main_mode'] == "ride"]['trav_time_seconds'])/60
    aTd =  np.mean(temp['traveled_distance'])/1000
    aTd_car =  np.mean(temp[temp['main_mode'] == "car"]['traveled_distance'])/1000
    aTd_ride =  np.mean(temp[temp['main_mode'] == "ride"]['traveled_distance'])/1000

    df_aTt_pH = pd.DataFrame()
    for h in range(0,36,1):
        aTt_pH_min =  np.mean(temp[temp['dep_hour'] == h]['trav_time_seconds'])/ 60
        aTd_pH_km =  np.mean(temp[temp['dep_hour'] == h]['traveled_distance'])/ 1000
        dict = {'hour': h, 'aTt_pH_min': aTt_pH_min, 'aTd_pH_km':aTd_pH_km,  "sample_size": sampleSize, "sample_nr": sampleNr, "alpha" : alpha, "stuck_time": stuckTime, "global_seed": globalSeed}
        df_temp = pd.DataFrame(dict, index=[0])
        df_aTt_pH = pd.concat([df_aTt_pH, df_temp], axis = 0, ignore_index= True)

    df_avg_trav_time = pd.DataFrame({"aTt_min": aTt,"aTt_car_min": aTt_car, "aTt_ride_min": aTt_ride, "aTd_km": aTd,"aTd_car_km": aTd_car, "aTd_ride_km": aTd_ride,  "sample_size": sampleSize, "sample_nr": sampleNr, "alpha" : alpha, "stuck_time": stuckTime, "global_seed": globalSeed}, index=[sampleNr])
    return [df_avg_trav_time, df_aTt_pH]

def calcAvgTravelDistance(pathToFile, sampleSize, sampleNr, alpha, stuckTime, globalSeed):
    temp = pd.read_csv(pathToFile, compression= "gzip", sep=";")
    avg_trav_dist = np.mean(temp["traveled_distance"])
    df_avg_trav_dist = pd.DataFrame({"avg_trav_dist": avg_trav_dist, "sample_size": sampleSize, "sample_nr": sampleNr, "alpha" : alpha, "stuck_time": stuckTime, "global_seed": globalSeed}, index=[sampleNr])
    return df_avg_trav_dist

In [3]:
def calcDepPerHour(pathToFile, sampleSize, sampleNr, alpha, stuckTime, globalSeed):
    temp = pd.read_csv(pathToFile, compression= "gzip", sep=";")
    departure_hours = []
    for element in temp['dep_time']:
        departure_hours.append(element[:2])
        
    df_all_dep = pd.DataFrame({'dep_hour': departure_hours})

    df_n_of_dep_per_hour = pd.DataFrame()
    for element in df_all_dep['dep_hour'].unique():
        temp_dep_hour = pd.DataFrame({'hour': element, 'n_departues': df_all_dep[(df_all_dep['dep_hour']== element)].shape[0] }, index = [0])
        df_n_of_dep_per_hour = pd.concat([df_n_of_dep_per_hour, temp_dep_hour], axis = 0, ignore_index= True)

    df_n_of_dep_per_hour = df_n_of_dep_per_hour.sort_values(by=['hour'])
    df_n_of_dep_per_hour.insert(2,"sample_size", sampleSize)
    df_n_of_dep_per_hour.insert(3,"sample_nr", sampleNr)
    df_n_of_dep_per_hour.insert(4,'alpha', alpha)
    df_n_of_dep_per_hour.insert(5,'stuck_time', stuckTime)
    df_n_of_dep_per_hour.insert(6,'global_seed', globalSeed)
    return df_n_of_dep_per_hour



#### 1 pct

In [22]:
flowCapF = ["0.01"]
storCapF =  ["0.01", "0.03162"]

# initialize empty data frames
avg_travel_times_and_dist_1pct = pd.DataFrame()
#avg_travel_distances_1pct = pd.DataFrame()
dep_per_hour_1pct = pd.DataFrame()
aTt_perHour_1pct = pd.DataFrame()

for fCf in flowCapF:
    for sCf in storCapF:
        for sampleNr in range(1,11,1):
            # calculate adjusted stuck time
            default_stuck_time = 30.0
            adjusted_stuck_time = 30.0/float(flowCapF[0])
            # declare sample size as str "1-pct"
            sample_size_as_string = str(int(float(fCf)*100)) + "-pct"

            if ((fCf == "0.01") & (sCf == '0.01')):
                # declare alpha
                alpha = 1.0
                # paths for case 1 and 3 
                path_case1 = "/home/lola/math_cluster/output/output-lausitz-1pct-"+ str(sampleNr) +"-fCf_sCF_" + sCf + "_gS_default_3765/lausitz-1pct-"+ str(sampleNr) +"-fCf_sCf_0.01_gS_default_3765.output_trips.csv.gz"
                path_case3 = "/home/lola/math_cluster/output/output-lausitz-1-pct-" +str(sampleNr) +"-fCf_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/lausitz-1-pct-" +str(sampleNr) + "-fCf_sCF_0.01_gS_4711_sT_3000.0_3765.output_trips.csv.gz"
                
                # average travel time
                aTt_case1= calcAvgTravelTimeAndDist(path_case1, sample_size_as_string, sampleNr,  alpha, default_stuck_time, 'default')
                aTt_case3= calcAvgTravelTimeAndDist(path_case3, sample_size_as_string, sampleNr,  alpha, adjusted_stuck_time, 'default')

                # concatenate with existing values
                avg_travel_times_and_dist_1pct = pd.concat([avg_travel_times_and_dist_1pct, aTt_case1[0], aTt_case3[0]], ignore_index= True)
                aTt_perHour_1pct = pd.concat([aTt_perHour_1pct, aTt_case1[1], aTt_case3[1]], ignore_index= True)

                # number of departures per hour 
                dPh_case1 = calcDepPerHour(path_case1, sample_size_as_string, sampleNr,  alpha, default_stuck_time, 'default')
                dPh_case3 = calcDepPerHour(path_case3, sample_size_as_string, sampleNr,  alpha, adjusted_stuck_time, 'default')

                # concatenate results
                dep_per_hour_1pct = pd.concat([dep_per_hour_1pct, dPh_case1, dPh_case3], ignore_index= True)


            else:
                alpha = 0.75
                path_case2 = "/home/lola/math_cluster/output/output-lausitz-1pct-" +str(sampleNr) + "-fCf_0.01_sCF_" + sCf + "_gS_default_3765/lausitz-1pct-" + str(sampleNr) + "-fCf_0.01_sCf_0.03162_gS_default_3765.output_trips.csv.gz"
                path_case4 = "/home/lola/math_cluster/output/output-lausitz-1-pct-" + str(sampleNr) + "-fCf_" + fCf + "_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/lausitz-1-pct-" + str(sampleNr) + "-fCf_0.01_sCF_0.03162_gS_4711_sT_3000.0_3765.output_trips.csv.gz"
                
                # average travel time
                aTt_case2= calcAvgTravelTimeAndDist(path_case2, sample_size_as_string, sampleNr,  alpha, default_stuck_time, 'default')
                aTt_case4= calcAvgTravelTimeAndDist(path_case4, sample_size_as_string, sampleNr,  alpha, adjusted_stuck_time, 'default')

                # concatenate with existing values
                avg_travel_times_and_dist_1pct = pd.concat([avg_travel_times_and_dist_1pct, aTt_case2[0], aTt_case4[0]], ignore_index= True)
                aTt_perHour_1pct = pd.concat([aTt_perHour_1pct, aTt_case2[1], aTt_case4[1]], ignore_index= True)

                # number of departures per hour 
                dPh_case2 = calcDepPerHour(path_case2, sample_size_as_string, sampleNr,  alpha, default_stuck_time, 'default')
                dPh_case4 = calcDepPerHour(path_case4, sample_size_as_string, sampleNr,  alpha, adjusted_stuck_time, 'default')

                # concatenate results
                dep_per_hour_1pct = pd.concat([dep_per_hour_1pct, dPh_case2, dPh_case4], ignore_index= True)




In [None]:
# 1 pct rGs
avg_trav_time_and_dist_1pct_rGs = pd.DataFrame()
aTt_perHour_1pct_rgs = pd.DataFrame()
rGs = [ 4711,3254, 2306, 6384,4338, 6003, 5502, 9377, 5621, 9002 ]
for seed in rGs:
    if (seed == 4711):
        global_seed = "rnd_" + str(seed)
        # Create Data frame and insert first value from avg_trav_time_1pct
        temp = avg_travel_times_and_dist_1pct[(avg_travel_times_and_dist_1pct['alpha']== 1.0) & (avg_travel_times_and_dist_1pct['stuck_time']== 30.0)& (avg_travel_times_and_dist_1pct['sample_nr'] == 1)]
        df1 = pd.DataFrame({"aTt_min": temp["aTt_min"],"aTt_car_min": temp['aTt_car_min'], "aTt_ride_min": temp['aTt_ride_min'], "aTd_km": temp['aTd_km'],"aTd_car_km": temp['aTd_car_km'], "aTd_ride_km": temp['aTd_car_km'], "sample_size": "1-pct", "sample_nr": 1, "alpha": 1.0, "stuck_time": 30.0, "global_seed": global_seed }, index = [rGs.index(seed)])
        avg_trav_time_and_dist_1pct_rGs = pd.concat([avg_trav_time_and_dist_1pct_rGs, df1])
        
        df2 = aTt_perHour_1pct[(aTt_perHour_1pct['sample_size'] == "1-pct")  & (aTt_perHour_1pct['alpha'] == 1) & (aTt_perHour_1pct['sample_nr'] == 1) & (aTt_perHour_1pct['stuck_time'] == 30.0)].copy()
        df2['global_seed'] = df2['global_seed'].str.replace("default", global_seed)
        aTt_perHour_1pct_rgs = pd.concat([aTt_perHour_1pct_rgs, df2], axis = 0, ignore_index = True)

    elif (seed == 3254):
        global_seed = "rnd_" + str(seed)
        path ="/home/lola/math_cluster/output/output-lausitz-1pct-1-fCf_sCF_0.01_gS_3254_3765/lausitz-1pct-1-fCf_sCf_0.01_gS_3254_3765.output_trips.csv.gz"

        # average travel time and distance
        aTt_case5= calcAvgTravelTimeAndDist(path, sample_size_as_string, 1,  alpha, default_stuck_time, global_seed)
        avg_trav_time_and_dist_1pct_rGs = pd.concat([avg_trav_time_and_dist_1pct_rGs, aTt_case5[0]], axis = 0, ignore_index = True)
        aTt_perHour_1pct_rgs = pd.concat([aTt_perHour_1pct_rgs, aTt_case5[1]], axis = 0, ignore_index = True)


    else:
        global_seed = "rnd_" + str(seed)
        path ="/home/lola/math_cluster/output/output-lausitz-1.0-pct-1-fCf_sCF_0.01_gS_"+ str(seed) + "_3765/lausitz-1.0-pct-1-fCf_sCF_0.01_gS_" +str(seed) +"_3765.output_trips.csv.gz"
        # average travel time and distance
        aTt_case5= calcAvgTravelTimeAndDist(path, sample_size_as_string, 1,  alpha, default_stuck_time, global_seed)
        avg_trav_time_and_dist_1pct_rGs = pd.concat([avg_trav_time_and_dist_1pct_rGs, aTt_case5[0]], axis = 0, ignore_index = True)
        aTt_perHour_1pct_rgs = pd.concat([aTt_perHour_1pct_rgs, aTt_case5[1]], axis = 0, ignore_index = True)


In [26]:
avg_travel_times_and_dist_1pct_all = pd.concat([avg_travel_times_and_dist_1pct, avg_trav_time_and_dist_1pct_rGs], axis = 0, ignore_index= True)
aTt_perHour_1pct = pd.concat([aTt_perHour_1pct, aTt_perHour_1pct_rgs], ignore_index= True)

In [27]:
avg_travel_times_and_dist_1pct_all.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/avg_trav_time_and_dist_all_1pct_samples.csv', index = False) 


#### 5 pct

In [28]:
flowCapF = ["0.05"]
storCapF =  ["0.05", "0.10574"]

# initialize empty data frames
avg_travel_times_and_dist_5pct = pd.DataFrame()
aTt_perHour_5pct = pd.DataFrame()
dep_per_hour_5pct = pd.DataFrame()


for fCf in flowCapF:
    for sCf in storCapF:
            for sampleNr in range(1,11,1):
                # calculate adjusted stuck time
                default_stuck_time = 30.0
                adjusted_stuck_time = 30.0/float(flowCapF[0])
                # declare sample size as str "1-pct"
                sample_size_as_string = str(int(float(fCf)*100)) + "-pct"

                # declare path based on case 
                if((fCf == "0.05") & (sCf == "0.05")):
                    alpha = 1.0
                    if (sampleNr == 6):
                        path_case1  = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-6-fCf_sCF_" + sCf + "_gS_4711_3765/lausitz-5.0-pct-6-fCf_sCF_0.05_gS_4711_3765-2.output_trips.csv.gz"
                    
                    else: 
                        path_case1 = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-" + str(sampleNr) + "-fCf_sCF_0.05_gS_4711_3765/lausitz-5.0-pct-" + str(sampleNr) + "-fCf_sCF_0.05_gS_4711_3765.output_trips.csv.gz"
                    
                    
                    path_case3 =  "/home/lola/math_cluster/output/output-lausitz-5-pct-" +str(sampleNr) + "-fCf_sCF_" + sCf + "_gS_4711_sT_600.0_3765/lausitz-5-pct-" + str(sampleNr) + "-fCf_sCF_0.05_gS_4711_sT_600.0_3765.output_trips.csv.gz"

                    # average travel time
                    aTt_case1= calcAvgTravelTimeAndDist(path_case1, sample_size_as_string, sampleNr,  alpha, default_stuck_time, 'default')
                    aTt_case3= calcAvgTravelTimeAndDist(path_case3, sample_size_as_string, sampleNr,  alpha, adjusted_stuck_time, 'default')

                    # concatenate with existing values
                    avg_travel_times_and_dist_5pct = pd.concat([avg_travel_times_and_dist_5pct, aTt_case1[0], aTt_case3[0]], ignore_index= True)
                    aTt_perHour_5pct = pd.concat([aTt_perHour_5pct, aTt_case1[1], aTt_case3[1]], ignore_index= True)

                    # number of departures per hour 
                    dPh_case1 = calcDepPerHour(path_case1, sample_size_as_string, sampleNr,  alpha, default_stuck_time, 'default')
                    dPh_case3 = calcDepPerHour(path_case3, sample_size_as_string, sampleNr,  alpha, adjusted_stuck_time, 'default')

                    # concatenate results
                    dep_per_hour_5pct = pd.concat([dep_per_hour_5pct, dPh_case1, dPh_case3], ignore_index= True)
                        
                else:
                    alpha = 0.75
                    if(sampleNr == 6):
                        path_case2 =  "/home/lola/math_cluster/output/output-lausitz-5.0-pct-6-fCf_0.05_sCF_0.10574_gS_4711_3765/lausitz-5.0-pct-6-fCf_0.05_sCF_0.10574_gS_4711_3765-2.output_trips.csv.gz"
                    else:
                        path_case2 = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-" +str(sampleNr) + "-fCf_0.05_sCF_0.10574_gS_4711_3765/lausitz-5.0-pct-" + str(sampleNr) + "-fCf_0.05_sCF_0.10574_gS_4711_3765.output_trips.csv.gz"
                    
                    
                    path_case4 = "/home/lola/math_cluster/output/output-lausitz-5-pct-" + str(sampleNr) + "-fCf_0.05_sCF_0.10574_gS_4711_sT_600.0_3765/lausitz-5-pct-" + str(sampleNr) + "-fCf_0.05_sCF_0.10574_gS_4711_sT_600.0_3765.output_trips.csv.gz"

                    # average travel time
                    aTt_case2= calcAvgTravelTimeAndDist(path_case2, sample_size_as_string, sampleNr,  alpha, default_stuck_time, 'default')
                    aTt_case4= calcAvgTravelTimeAndDist(path_case4, sample_size_as_string, sampleNr,  alpha, adjusted_stuck_time, 'default')

                    # concatenate with existing values
                    avg_travel_times_and_dist_5pct = pd.concat([avg_travel_times_and_dist_5pct, aTt_case2[0], aTt_case4[0]], ignore_index= True)
                    aTt_perHour_5pct = pd.concat([aTt_perHour_5pct, aTt_case2[1], aTt_case4[1]], ignore_index= True)

                    # number of departures per hour 
                    dPh_case2 = calcDepPerHour(path_case2, sample_size_as_string, sampleNr,  alpha, default_stuck_time, 'default')
                    dPh_case4 = calcDepPerHour(path_case4, sample_size_as_string, sampleNr,  alpha, adjusted_stuck_time, 'default')

                    # concatenate results
                    dep_per_hour_5pct = pd.concat([dep_per_hour_5pct, dPh_case2, dPh_case4], ignore_index= True)
                    

In [29]:
# 5 pct random global seed
avg_trav_time_and_dist_5pct_rGs = pd.DataFrame()
aTt_perHour_5pct_rgs = pd.DataFrame()
rGs = [ 4711,3254, 2306, 6384,4338, 6003, 5502, 9377, 5621, 9002 ]
for seed in rGs:
    if (seed == 4711):
        global_seed = "rnd_" + str(seed)
        # Create Data frame and insert first value from avg_trav_time_5pct
        temp = avg_travel_times_and_dist_5pct[(avg_travel_times_and_dist_5pct['alpha']== 1.0) & (avg_travel_times_and_dist_5pct['stuck_time']== 30.0)& (avg_travel_times_and_dist_5pct['sample_nr'] == 1)]
        df1 = pd.DataFrame({"aTt_min": temp["aTt_min"],"aTt_car_min": temp['aTt_car_min'], "aTt_ride_min": temp['aTt_ride_min'], "aTd_km": temp['aTd_km'],"aTd_car_km": temp['aTd_car_km'], "aTd_ride_km": temp['aTd_car_km'], "sample_size": "1-pct", "sample_nr": 1, "alpha": 1.0, "stuck_time": 30.0, "global_seed": global_seed }, index = [rGs.index(seed)])
        avg_trav_time_and_dist_5pct_rGs = pd.concat([avg_trav_time_and_dist_5pct_rGs, df1])

        df2 = aTt_perHour_5pct[(aTt_perHour_5pct['sample_size'] == "5-pct")  & (aTt_perHour_5pct['alpha'] == 1) & (aTt_perHour_5pct['sample_nr'] == 1) & (aTt_perHour_5pct['stuck_time'] == 30.0)].copy()
        df2['global_seed'] = df2['global_seed'].str.replace("default", global_seed)
        aTt_perHour_5pct_rgs = pd.concat([aTt_perHour_5pct_rgs, df2], axis = 0, ignore_index = True)
        
    elif (seed == 3254):
        global_seed = "rnd_" + str(seed)
        path ="/home/lola/math_cluster/output/output-lausitz-5.0-pct-1-fCf_sCF_0.05_gS_3254_3765/lausitz-5.0-pct-1-fCf_sCF_0.05_gS_3254_3765.output_trips.csv.gz"
        aTt_case5= calcAvgTravelTimeAndDist(path, "5-pct", 1,  1.0, 30.0, global_seed)

        # average travel time and distance
        avg_trav_time_and_dist_5pct_rGs = pd.concat([avg_trav_time_and_dist_5pct_rGs, aTt_case5[0]], axis = 0)
        aTt_perHour_5pct_rgs = pd.concat([aTt_perHour_5pct_rgs, aTt_case5[1]], axis = 0, ignore_index = True)

    else:
        global_seed = "rnd_" + str(seed)
        path ="/home/lola/math_cluster/output/output-lausitz-5.0-pct-1-fCf_sCF_0.05_gS_" + str(seed) + "_3765/lausitz-5.0-pct-1-fCf_sCF_0.05_gS_" + str(seed) + "_3765.output_trips.csv.gz"
        # average travel time and dist
        aTt_case5= calcAvgTravelTimeAndDist(path, "5-pct", 1,  1.0, 30.0, global_seed)

        avg_trav_time_and_dist_5pct_rGs = pd.concat([avg_trav_time_and_dist_5pct_rGs, aTt_case5[0]], axis = 0)
        aTt_perHour_5pct_rgs = pd.concat([aTt_perHour_5pct_rgs, aTt_case5[1]], axis = 0, ignore_index = True)


In [31]:
avg_travel_times_and_dist_5pct_all = pd.concat([avg_travel_times_and_dist_5pct, avg_trav_time_and_dist_5pct_rGs], axis = 0)
aTt_perHour_5pct = pd.concat([aTt_perHour_5pct, aTt_perHour_5pct_rgs], ignore_index= True)

In [32]:
avg_travel_times_and_dist_5pct_all.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/avg_trav_time_and_dist_all_5pct_samples.csv', index = False) 

#### 10 pct

In [33]:
flowCapF = ["0.1"]
storCapF =  ["0.1", "0.17783"]

# initialize empty data frames
avg_travel_times_and_dist_10pct = pd.DataFrame()
aTt_perHour_10pct = pd.DataFrame()
dep_per_hour_10pct = pd.DataFrame()

for fCf in flowCapF:
    for sCf in storCapF:
            for sampleNr in range(1,11,1):
                # calculate adjusted stuck time
                default_stuck_time = 30.0
                adjusted_stuck_time = 30.0/float(flowCapF[0])
                # declare sample size as str "1-pct"
                sample_size_as_string = str(int(float(fCf)*100)) + "-pct"
                # declare path based on case 
                if((fCf == "0.1") & (sCf == "0.1")):
                    alpha = 1.0
                    path_case1 = "/home/lola/math_cluster/output/output-lausitz-10.0-pct-" +str(sampleNr) + "-fCf_sCF_" + sCf + "_gS_4711_3765/lausitz-10.0-pct-" +str(sampleNr) + "-fCf_sCF_0.1_gS_4711_3765.output_trips.csv.gz"
                    path_case3 = "/home/lola/math_cluster/output/output-lausitz-10-pct-" +str(sampleNr) + "-fCf_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/lausitz-10-pct-" +str(sampleNr) + "-fCf_sCF_0.1_gS_4711_sT_300.0_3765.output_trips.csv.gz"
                    
                    # average travel time
                    aTt_case1= calcAvgTravelTimeAndDist(path_case1, sample_size_as_string, sampleNr,  alpha, default_stuck_time, 'default')
                    aTt_case3= calcAvgTravelTimeAndDist(path_case3, sample_size_as_string, sampleNr,  alpha, adjusted_stuck_time, 'default')

                    # concatenate with existing values
                    avg_travel_times_and_dist_10pct = pd.concat([avg_travel_times_and_dist_10pct, aTt_case1[0], aTt_case3[0]], ignore_index= True)
                    aTt_perHour_10pct = pd.concat([aTt_perHour_10pct, aTt_case1[1], aTt_case3[1]], ignore_index= True)


                    # number of departures per hour 
                    dPh_case1 = calcDepPerHour(path_case1, sample_size_as_string, sampleNr,  alpha, default_stuck_time, 'default')
                    dPh_case3 = calcDepPerHour(path_case3, sample_size_as_string, sampleNr,  alpha, adjusted_stuck_time, 'default')

                    # concatenate results
                    dep_per_hour_10pct = pd.concat([dep_per_hour_10pct, dPh_case1, dPh_case3], ignore_index= True)
                else:
                    alpha = 0.75
                    path_case2 = "/home/lola/math_cluster/output/output-lausitz-10.0-pct-" + str(sampleNr) + "-fCf_" + fCf + "_sCF_" + sCf +  "_gS_4711_3765/lausitz-10.0-pct-" + str(sampleNr) +"-fCf_0.1_sCF_0.17783_gS_4711_3765.output_trips.csv.gz"
                    path_case4 = "/home/lola/math_cluster/output/output-lausitz-10-pct-" +str(sampleNr) + "-fCf_" + fCf + "_sCF_" + sCf + "_gS_4711_sT_"+ str(adjusted_stuck_time) + "_3765/lausitz-10-pct-" + str(sampleNr) + "-fCf_0.1_sCF_0.17783_gS_4711_sT_300.0_3765.output_trips.csv.gz"

                    # average travel time
                    aTt_case2= calcAvgTravelTimeAndDist(path_case2, sample_size_as_string, sampleNr,  alpha, default_stuck_time, 'default')
                    aTt_case4= calcAvgTravelTimeAndDist(path_case4, sample_size_as_string, sampleNr,  alpha, adjusted_stuck_time, 'default')

                    # concatenate with existing values
                    avg_travel_times_and_dist_10pct = pd.concat([avg_travel_times_and_dist_10pct, aTt_case2[0], aTt_case4[0]], ignore_index= True)
                    aTt_perHour_10pct = pd.concat([aTt_perHour_10pct, aTt_case2[1], aTt_case4[1]], ignore_index= True)



                    # number of departures per hour 
                    dPh_case2 = calcDepPerHour(path_case2, sample_size_as_string, sampleNr,  alpha, default_stuck_time, 'default')
                    dPh_case4 = calcDepPerHour(path_case4, sample_size_as_string, sampleNr,  alpha, adjusted_stuck_time, 'default')

                    # concatenate results
                    dep_per_hour_10pct = pd.concat([dep_per_hour_10pct, dPh_case2, dPh_case4], ignore_index= True)
                    
                    

  temp = pd.read_csv(pathToFile, compression= "gzip", sep=";")
  temp = pd.read_csv(pathToFile, compression= "gzip", sep=";")
  temp = pd.read_csv(pathToFile, compression= "gzip", sep=";")
  temp = pd.read_csv(pathToFile, compression= "gzip", sep=";")
  temp = pd.read_csv(pathToFile, compression= "gzip", sep=";")
  temp = pd.read_csv(pathToFile, compression= "gzip", sep=";")
  temp = pd.read_csv(pathToFile, compression= "gzip", sep=";")
  temp = pd.read_csv(pathToFile, compression= "gzip", sep=";")


In [34]:
avg_travel_times_and_dist_10pct.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/avg_travel_times_and_dist_10pct_samples.csv', index = False) 

#### 25 pct

In [35]:
flowCapF = ["0.25"]
storCapF =  ["0.25", "0.35355"]

# initialize empty data frames
avg_travel_times_and_dist_25pct = pd.DataFrame()
aTt_perHour_25pct = pd.DataFrame()
dep_per_hour_25pct = pd.DataFrame()

for fCf in flowCapF:
    for sCf in storCapF:
            for sampleNr in range(1,2,1):
                # calculate adjusted stuck time
                default_stuck_time = 30.0
                adjusted_stuck_time = 30.0/float(flowCapF[0])
                # declare sample size as str "1-pct"
                sample_size_as_string = str(int(float(fCf)*100)) + "-pct"
                
                # declare path based on case 
                if((fCf == "0.25") & (sCf == "0.25")):
                    alpha = 1.0
                    path_case1 = "/home/lola/math_cluster/output/output-lausitz-25.0-pct-fCf_sCF_" + sCf + "_gS_4711_3765/lausitz-25.0-pct-fCf_sCF_0.25_gS_4711_3765.output_trips.csv.gz"
                    path_case3 = "/home/lola/math_cluster/output/output-lausitz-25-pct-1-fCf_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/lausitz-25-pct-1-fCf_sCF_0.25_gS_4711_sT_120.0_3765.output_trips.csv.gz"

                    # average travel time and distances
                    aTt_case1= calcAvgTravelTimeAndDist(path_case1, sample_size_as_string, sampleNr,  alpha, default_stuck_time, 'default')
                    aTt_case3= calcAvgTravelTimeAndDist(path_case3, sample_size_as_string, sampleNr,  alpha, adjusted_stuck_time, 'default')

                    # concatenate with existing values
                    avg_travel_times_and_dist_25pct = pd.concat([avg_travel_times_and_dist_25pct, aTt_case1[0], aTt_case3[0]], ignore_index= True)
                    aTt_perHour_25pct = pd.concat([aTt_perHour_25pct, aTt_case1[1], aTt_case3[1]], ignore_index= True)

                    # number of departures per hour 
                    dPh_case1 = calcDepPerHour(path_case1, sample_size_as_string, sampleNr,  alpha, default_stuck_time, 'default')
                    dPh_case3 = calcDepPerHour(path_case3, sample_size_as_string, sampleNr,  alpha, adjusted_stuck_time, 'default')

                    # concatenate results
                    dep_per_hour_25pct = pd.concat([dep_per_hour_25pct, dPh_case1, dPh_case3], ignore_index= True)

                else:
                    alpha = 0.75
                    path_case2 = "/home/lola/math_cluster/output/output-lausitz-25.0-pct-fCf_" + fCf + "_sCF_" + sCf + "_gS_4711_3765/lausitz-25.0-pct-fCf_0.25_sCF_0.35355_gS_4711_3765.output_trips.csv.gz"
                    path_case4 = "/home/lola/math_cluster/output/output-lausitz-25-pct-1-fCf_" + fCf + "_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/lausitz-25-pct-1-fCf_0.25_sCF_0.35355_gS_4711_sT_120.0_3765.output_trips.csv.gz"
                    
                    # average travel time and distances
                    aTt_case2= calcAvgTravelTimeAndDist(path_case2, sample_size_as_string, sampleNr,  alpha, default_stuck_time, 'default')
                    aTt_case4= calcAvgTravelTimeAndDist(path_case4, sample_size_as_string, sampleNr,  alpha, adjusted_stuck_time, 'default')

                    # concatenate with existing values
                    avg_travel_times_and_dist_25pct = pd.concat([avg_travel_times_and_dist_25pct, aTt_case2[0], aTt_case4[0]], ignore_index= True)
                    aTt_perHour_25pct = pd.concat([aTt_perHour_25pct, aTt_case2[1], aTt_case4[1]], ignore_index= True)

                    # number of departures per hour 
                    dPh_case2 = calcDepPerHour(path_case2, sample_size_as_string, sampleNr,  alpha, default_stuck_time, 'default')
                    dPh_case4 = calcDepPerHour(path_case4, sample_size_as_string, sampleNr,  alpha, adjusted_stuck_time, 'default')

                    # concatenate results
                    dep_per_hour_25pct = pd.concat([dep_per_hour_25pct, dPh_case2, dPh_case4], ignore_index= True)

In [36]:
avg_travel_times_and_dist_25pct.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/avg_travel_times_and_dist_25pct_samples.csv', index = False) 


#### 50 pct

In [37]:
flowCapF = ["0.5"]
storCapF =  ["0.5", "0.5946"]

# initialize empty data frames
avg_travel_times_and_dist_50pct = pd.DataFrame()
aTt_perHour_50pct = pd.DataFrame()
dep_per_hour_50pct = pd.DataFrame()

for fCf in flowCapF:
    for sCf in storCapF:
            for sampleNr in range(1,2,1):
                # calculate adjusted stuck time
                default_stuck_time = 30.0
                adjusted_stuck_time = 30.0/float(flowCapF[0])
                # declare sample size as str "1-pct"
                sample_size_as_string = str(int(float(fCf)*100)) + "-pct"

                # declare path based on case 
                if((fCf == "0.5") & (sCf == "0.5")):
                    alpha = 1.0
                    path_case1 = "/home/lola/math_cluster/output/output-lausitz-50.0-pct-fCf_sCF_" + sCf + "_gS_4711_3765/lausitz-50.0-pct-fCf_sCF_0.5_gS_4711_3765.output_trips.csv.gz"
                    path_case3 = "/home/lola/math_cluster/output/output-lausitz-50-pct-1-fCf_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/lausitz-50-pct-1-fCf_sCF_0.5_gS_4711_sT_60.0_3765.output_trips.csv.gz"
                    
                    # average travel time
                    aTt_case1= calcAvgTravelTimeAndDist(path_case1, sample_size_as_string, sampleNr,  alpha, default_stuck_time, 'default')
                    aTt_case3= calcAvgTravelTimeAndDist(path_case3, sample_size_as_string, sampleNr,  alpha, adjusted_stuck_time, 'default')

                    # concatenate with existing values
                    avg_travel_times_and_dist_50pct = pd.concat([avg_travel_times_and_dist_50pct, aTt_case1[0], aTt_case3[0]], ignore_index= True)
                    aTt_perHour_50pct = pd.concat([aTt_perHour_50pct, aTt_case1[1], aTt_case3[1]], ignore_index= True)


                    # number of departures per hour 
                    dPh_case1 = calcDepPerHour(path_case1, sample_size_as_string, sampleNr,  alpha, default_stuck_time, 'default')
                    dPh_case3 = calcDepPerHour(path_case3, sample_size_as_string, sampleNr,  alpha, adjusted_stuck_time, 'default')

                    # concatenate results
                    dep_per_hour_50pct = pd.concat([dep_per_hour_50pct, dPh_case1, dPh_case3], ignore_index= True)

                else:
                    alpha = 0.75
                    path_case2 = "/home/lola/math_cluster/output/output-lausitz-50.0-pct-fCf_" + fCf + "_sCF_" + sCf + "_gS_4711_3765/lausitz-50.0-pct-fCf_0.5_sCF_0.5946_gS_4711_3765.output_trips.csv.gz"
                    path_case4 = "/home/lola/math_cluster/output/output-lausitz-50-pct-1-fCf_" + fCf + "_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/lausitz-50-pct-1-fCf_0.5_sCF_0.5946_gS_4711_sT_60.0_3765.output_trips.csv.gz"
                    
                    # average travel time
                    aTt_case2= calcAvgTravelTimeAndDist(path_case2, sample_size_as_string, sampleNr,  alpha, default_stuck_time, 'default')
                    aTt_case4= calcAvgTravelTimeAndDist(path_case4, sample_size_as_string, sampleNr,  alpha, adjusted_stuck_time, 'default')

                    # concatenate with existing values
                    avg_travel_times_and_dist_50pct = pd.concat([avg_travel_times_and_dist_50pct, aTt_case2[0], aTt_case4[0]], ignore_index= True)
                    aTt_perHour_50pct = pd.concat([aTt_perHour_50pct, aTt_case2[1], aTt_case4[1]], ignore_index= True)


                    # number of departures per hour 
                    dPh_case2 = calcDepPerHour(path_case2, sample_size_as_string, sampleNr,  alpha, default_stuck_time, 'default')
                    dPh_case4 = calcDepPerHour(path_case4, sample_size_as_string, sampleNr,  alpha, adjusted_stuck_time, 'default')

                    # concatenate results
                    dep_per_hour_50pct = pd.concat([dep_per_hour_50pct, dPh_case2, dPh_case4], ignore_index= True)
                    



In [38]:
# 25 pct doubled

path_case1 = "/home/lola/Nextcloud/Masterarbeit/03_Outputs_From_RunsLausitz/output-lausitz-25-pct-doubled-fCf_0.5_sCF_0.5_gS_4711_3765/lausitz-25-pct-doubled-fCf_0.5_sCF_0.5_gS_4711__3765.output_trips.csv.gz"
# average travel time
aTt_case1= calcAvgTravelTimeAndDist(path_case1, "25-pct-doubled", 1,  1.0, 30.0, 'default')

# concatenate with existing values
avg_travel_times_and_dist_50pct = pd.concat([avg_travel_times_and_dist_50pct, aTt_case1[0]], ignore_index= True)


# number of departures per hour 
dPh_case1 = calcDepPerHour(path_case1, "25-pct-doubled", 1,  1.0, 30.0, 'default')

# concatenate results
dep_per_hour_50pct = pd.concat([dep_per_hour_50pct, dPh_case1], ignore_index= True)




In [39]:
avg_travel_times_and_dist_50pct.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/avg_travel_times_and_dist_50pct_samples.csv', index = False) 

#### 100 pct

In [40]:
# 100 pct
dep_per_hour_100pct = pd.DataFrame()
avg_travel_times_and_dist_100pct = pd.DataFrame()
avg_travel_distances_100pct = pd.DataFrame()
path_case1 = "/home/lola/math_cluster/output/output-lausitz-100.0-pct-fCf_sCF_1.0_gS_4711_3765/lausitz-100.0-pct-fCf_sCF_1.0_gS_4711_3765.output_trips.csv.gz"
aTt_case1= calcAvgTravelTimeAndDist(path_case1, "100-pct", 1,  1.0, 30.0, 'default')

# concatenate with existing values
avg_travel_times_and_dist_100pct = pd.concat([avg_travel_times_and_dist_100pct, aTt_case1[0]], ignore_index= True)
aTt_perHour_100pct = aTt_case1[1]
# number of departures per hour 
dPh_case1 = calcDepPerHour(path_case1, "100-pct", 1,  1.0, 30.0, 'default')

# concatenate results
dep_per_hour_100pct = pd.concat([dep_per_hour_100pct, dPh_case1], ignore_index= True)


In [41]:
# 25 pct quadrupled
path_case1 = "/home/lola/Nextcloud/Masterarbeit/03_Outputs_From_RunsLausitz/output-lausitz-25.0-pct-quadrupled-fCf_1.0_sCF_1.0_gS_4711_3765/lausitz-25-pct-quadrupled-fCf_1.0_sCF_1.0_gS_4711__3765.output_trips.csv.gz"
# average travel time
aTt_case1= calcAvgTravelTimeAndDist(path_case1, "25-pct-quadrupled", 1,  1.0, 30.0, 'default')

# concatenate with existing values
avg_travel_times_and_dist_100pct = pd.concat([avg_travel_times_and_dist_100pct, aTt_case1[0]], ignore_index= True)


# number of departures per hour 
dPh_case1 = calcDepPerHour(path_case1, "25-pct-quadrupled", 1,  1.0, 30.0, 'default')

# concatenate results
dep_per_hour_100pct = pd.concat([dep_per_hour_100pct, dPh_case1], ignore_index= True)


  temp = pd.read_csv(pathToFile, compression= "gzip", sep=";")
  temp = pd.read_csv(pathToFile, compression= "gzip", sep=";")


In [42]:
# concat 
avg_trav_time_and_dist_all = pd.concat([avg_travel_times_and_dist_1pct_all, avg_travel_times_and_dist_5pct_all, avg_travel_times_and_dist_10pct,
                                avg_travel_times_and_dist_25pct, avg_travel_times_and_dist_50pct, avg_travel_times_and_dist_100pct], axis=0)

In [43]:
aTt_perHour_1_100 = pd.concat([aTt_perHour_1pct, aTt_perHour_5pct, aTt_perHour_10pct, aTt_perHour_25pct, aTt_perHour_50pct, aTt_perHour_100pct], ignore_index= True)

In [44]:
dep_per_hour_all = pd.concat([dep_per_hour_1pct, dep_per_hour_5pct, dep_per_hour_10pct, dep_per_hour_25pct, dep_per_hour_50pct, dep_per_hour_100pct], axis = 0, ignore_index= True)

In [45]:
aggregated_departures = pd.DataFrame()
list_adj_Factors = []

for sampleSize in dep_per_hour_all['sample_size'].unique():
    for alpha in dep_per_hour_all[(dep_per_hour_all['sample_size'] == sampleSize)]['alpha'].unique():
        for stuckTime in dep_per_hour_all[(dep_per_hour_all['sample_size'] == sampleSize) & (dep_per_hour_all['alpha'] == alpha)]['stuck_time'].unique():
            for hour in dep_per_hour_all[(dep_per_hour_all['sample_size'] == sampleSize) & (dep_per_hour_all['alpha'] == alpha) & (dep_per_hour_all['stuck_time'] == stuckTime)]['hour'].unique():
                # calculate adjustment factor
                if (sampleSize.find('doubled') > -1):
                    adj_Factor = 2.0
                elif(sampleSize.find('quadrupled') > -1):
                    adj_Factor = 1.0
                else:
                    sZ = sampleSize.replace("-pct", "")
                    adj_Factor = 100.0 / float(sZ)
                list_adj_Factors.append(adj_Factor)

                avg_departures_scaled = np.mean(dep_per_hour_all[(dep_per_hour_all['sample_size'] == sampleSize) & (dep_per_hour_all['alpha'] == alpha) & (dep_per_hour_all['stuck_time'] == stuckTime) 
                                                          & (dep_per_hour_all['hour'] == hour)]['n_departues'])*adj_Factor
                temp = pd.DataFrame({'sample_size': sampleSize, 'alpha': alpha, 'stuck_time': stuckTime, 'hour': hour, 'avg_dep_scaled': avg_departures_scaled}, index = [0])
                aggregated_departures = pd.concat([aggregated_departures, temp], axis = 0, ignore_index= True)
                

            

In [46]:
avg_trav_time_and_dist_all.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/avg_trav_time_and_dist_1_to_100_pct_samples.csv', index = False)
aggregated_departures.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/aggregated_departures_1_to_100_pct_samples_already_scaled.csv', index = False)
aTt_perHour_1_100.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/avg_trav_time_per_hour_1_to_100_pct.csv', index = False)

## 8. Travel Time distribution

#### Travel time categories

In [2]:
def sixMinuteCategoriesUpToTwoHours(df):
    categories = []
    for t in df["trav_time2"]:


        if (0.0 <= t and t < 0.10):
            categories.append("0:00_0:06")
        elif (0.10 <= t and t < 0.20):
            categories.append("0:06_0:12") 
        elif (0.20 <= t and t < 0.30):
            categories.append("0:12_0:18")
        elif (0.30 <= t and t < 0.40):
            categories.append("0:18_0:24")
        elif (0.40 <= t and t < 0.50):
            categories.append("0:24_0:30")
        elif (0.50 <= t and t < 0.60):
            categories.append("0:30_0:36")  
        elif (0.60 <= t and t < 0.70):
            categories.append("0:36_0:42")
        elif (0.70 <= t and t < 0.80):
            categories.append("0:42_0:48")
        elif (0.80 <= t and t < 0.90):
            categories.append("0:48_0:54")
        elif (0.90 <= t and t < 1.00):
            categories.append("0:54_1:00")


        elif (1.0 <= t and t < 1.10):
            categories.append("1:00_1:06")
        elif (1.10 <= t and t < 1.20):
            categories.append("1:06_1:12") 
        elif (1.20 <= t and t < 1.30):
            categories.append("1:12_1:18")
        elif (1.30 <= t and t < 1.40):
            categories.append("1:18_1:24")
        elif (1.40 <= t and t < 1.50):
            categories.append("1:24_1:30")
        elif (1.50 <= t and t < 1.60):
            categories.append("1:30_1:36")  
        elif (1.60 <= t and t < 1.70):
            categories.append("1:36_1:42")
        elif (1.70 <= t and t < 1.80):
            categories.append("1:42_1:48")
        elif (1.80 <= t and t < 1.90):
            categories.append("1:48_1:54")
        elif (1.90 <= t and t < 2.00):
            categories.append("1:54_2:00")

        else: categories.append(">2h")

    temp = pd.DataFrame({ "categories": categories})
    temp = temp.sort_values(by=['categories'])

    freq_6_min_cat = []
    for cat in temp.categories.unique():
        f = temp[(temp["categories"]==cat)].shape[0]
        freq_6_min_cat.append(f)
    
    trav_times_6min_cat = pd.DataFrame({"category":temp.categories.unique(), "freq": freq_6_min_cat })
    return trav_times_6min_cat 


    

In [3]:
def calculateSixMinCat(pathToFile, SampleNr):
    temp = pd.read_csv(pathToFile, compression= "gzip", sep=";")
    temp["trav_time"] = pd.to_timedelta(temp.trav_time)
    temp["trav_time2"] = temp.trav_time.astype('timedelta64[s]')/  pd.Timedelta(minutes=60)
    trav_time_cat = sixMinuteCategoriesUpToTwoHours(temp)
    sampleNrAsString = str(SampleNr)
    trav_time_cat.rename(columns={"freq": "freq_" + sampleNrAsString}, inplace = True )
    return trav_time_cat
    

#### 1pct

In [4]:
flowCapF = ["0.01"]
storCapF =  ["0.01", "0.03162"]

# initialize empty data frames
avg_travel_time_dist_1pct_case1 = []
avg_travel_time_dist_1pct_case2 = []
avg_travel_time_dist_1pct_case3 = []
avg_travel_time_dist_1pct_case4 = []


for fCf in flowCapF:
    for sCf in storCapF:
        for sampleNr in range(1,11,1):
            # calculate adjusted stuck time
            default_stuck_time = 30.0
            adjusted_stuck_time = 30.0/float(flowCapF[0])
            # declare sample size as str "1-pct"
            sample_size_as_string = str(int(float(fCf)*100)) + "-pct"

            if ((fCf == "0.01") & (sCf == '0.01')):
                # paths for case 1 and 3 
                path_case1 = "/home/lola/math_cluster/output/output-lausitz-1pct-"+ str(sampleNr) +"-fCf_sCF_" + sCf + "_gS_default_3765/lausitz-1pct-"+ str(sampleNr) +"-fCf_sCf_0.01_gS_default_3765.output_trips.csv.gz"
                path_case3 = "/home/lola/math_cluster/output/output-lausitz-1-pct-" +str(sampleNr) +"-fCf_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/lausitz-1-pct-" +str(sampleNr) + "-fCf_sCF_0.01_gS_4711_sT_3000.0_3765.output_trips.csv.gz"
                
                # calculate the six minute categories and append them
                avg_travel_time_dist_1pct_case1.append(calculateSixMinCat(path_case1, sampleNr))
                avg_travel_time_dist_1pct_case3.append(calculateSixMinCat(path_case3, sampleNr))

            else:
                
                # calculate the six minute categories and append them
                path_case2 = "/home/lola/math_cluster/output/output-lausitz-1pct-" +str(sampleNr) + "-fCf_0.01_sCF_" + sCf + "_gS_default_3765/lausitz-1pct-" + str(sampleNr) + "-fCf_0.01_sCf_0.03162_gS_default_3765.output_trips.csv.gz"
                path_case4 = "/home/lola/math_cluster/output/output-lausitz-1-pct-" + str(sampleNr) + "-fCf_" + fCf + "_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/lausitz-1-pct-" + str(sampleNr) + "-fCf_0.01_sCF_0.03162_gS_4711_sT_3000.0_3765.output_trips.csv.gz"
                
                avg_travel_time_dist_1pct_case2.append(calculateSixMinCat(path_case2, sampleNr))
                avg_travel_time_dist_1pct_case4.append(calculateSixMinCat(path_case4, sampleNr))




In [5]:
df_avg_travel_time_dist_1pct_case1 = pd.DataFrame({'sixMinCategories': avg_travel_time_dist_1pct_case1[0]['category'], 'freq_1': avg_travel_time_dist_1pct_case1[0]['freq_1'] })
df_avg_travel_time_dist_1pct_case2 = pd.DataFrame({'sixMinCategories': avg_travel_time_dist_1pct_case2[0]['category'], 'freq_1': avg_travel_time_dist_1pct_case2[0]['freq_1'] })
df_avg_travel_time_dist_1pct_case3 = pd.DataFrame({'sixMinCategories': avg_travel_time_dist_1pct_case3[0]['category'], 'freq_1': avg_travel_time_dist_1pct_case3[0]['freq_1'] })
df_avg_travel_time_dist_1pct_case4 = pd.DataFrame({'sixMinCategories': avg_travel_time_dist_1pct_case4[0]['category'], 'freq_1': avg_travel_time_dist_1pct_case4[0]['freq_1'] })

for sampleNr in range(1,10,1):
    colname = "freq_" + str(sampleNr + 1)
    df_avg_travel_time_dist_1pct_case1.insert(sampleNr + 1, colname,avg_travel_time_dist_1pct_case1[sampleNr][colname] )
    df_avg_travel_time_dist_1pct_case2.insert(sampleNr + 1, colname,avg_travel_time_dist_1pct_case2[sampleNr][colname] )
    df_avg_travel_time_dist_1pct_case3.insert(sampleNr + 1, colname,avg_travel_time_dist_1pct_case3[sampleNr][colname] )
    df_avg_travel_time_dist_1pct_case4.insert(sampleNr + 1, colname,avg_travel_time_dist_1pct_case4[sampleNr][colname] )

In [6]:
mean_case1 = []
mean_case2 = []
mean_case3 = []
mean_case4 = []
for category in range(0,21,1):
    mean_case1.append(np.mean(df_avg_travel_time_dist_1pct_case1.iloc[category,1:11]))
    mean_case2.append(np.mean(df_avg_travel_time_dist_1pct_case2.iloc[category,1:11]))
    mean_case3.append(np.mean(df_avg_travel_time_dist_1pct_case3.iloc[category,1:11]))
    mean_case4.append(np.mean(df_avg_travel_time_dist_1pct_case4.iloc[category,1:11]))

In [7]:
df_avg_travel_time_dist_1pct_case1.insert(11, 'mean', mean_case1)
df_avg_travel_time_dist_1pct_case2.insert(11, 'mean', mean_case2)
df_avg_travel_time_dist_1pct_case3.insert(11, 'mean', mean_case3)
df_avg_travel_time_dist_1pct_case4.insert(11, 'mean', mean_case4)

In [8]:
df_avg_travel_time_dist_1pct_case1.insert(12, 'sample_size', "1-pct")
df_avg_travel_time_dist_1pct_case1.insert(13, 'alpha', 1.0)
df_avg_travel_time_dist_1pct_case1.insert(14, 'stuck_time', 30.0)
df_avg_travel_time_dist_1pct_case1.insert(15,'global_seed', "default") 

In [9]:
df_avg_travel_time_dist_1pct_case2.insert(12, 'sample_size', "1-pct")
df_avg_travel_time_dist_1pct_case2.insert(13, 'alpha', 0.75)
df_avg_travel_time_dist_1pct_case2.insert(14, 'stuck_time', 30.0)
df_avg_travel_time_dist_1pct_case2.insert(15,'global_seed', "default") 

In [10]:
df_avg_travel_time_dist_1pct_case3.insert(12, 'sample_size', "1-pct")
df_avg_travel_time_dist_1pct_case3.insert(13, 'alpha', 1.0)
df_avg_travel_time_dist_1pct_case3.insert(14, 'stuck_time', 3000.0)
df_avg_travel_time_dist_1pct_case3.insert(15,'global_seed', "default") 

In [11]:
df_avg_travel_time_dist_1pct_case4.insert(12, 'sample_size', "1-pct")
df_avg_travel_time_dist_1pct_case4.insert(13, 'alpha', 0.75)
df_avg_travel_time_dist_1pct_case4.insert(14, 'stuck_time', 3000.0)
df_avg_travel_time_dist_1pct_case4.insert(15,'global_seed', "default") 

In [12]:
df_avg_travel_time_dist_1pct_all = pd.concat([df_avg_travel_time_dist_1pct_case1, df_avg_travel_time_dist_1pct_case2, df_avg_travel_time_dist_1pct_case3, df_avg_travel_time_dist_1pct_case4], axis = 0)

In [13]:
df_avg_travel_time_dist_1pct_all.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/trav_time_categories_1pct_cases_1_4.csv', index = False)

#### 5 pct

In [14]:
trav_time_5pct_categories = []
for elem in range(1,11,1):
    if (elem == 6):
        path = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-6-fCf_sCF_0.05_gS_4711_3765/lausitz-5.0-pct-6-fCf_sCF_0.05_gS_4711_3765-2.output_trips.csv.gz"
        temp = pd.read_csv(path, compression = "gzip", sep = ";")
        temp["trav_time"] = pd.to_timedelta(temp.trav_time)
        temp["trav_time2"] = temp.trav_time.astype('timedelta64[s]')/  pd.Timedelta(minutes=60)
        trav_time_cat = sixMinuteCategoriesUpToTwoHours(temp)
        trav_time_cat.rename(columns={"freq": "freq_" + str(elem)}, inplace = True )
        trav_time_5pct_categories.append(trav_time_cat)
    else: 
        # ERROR WRONG PATH, Now corrected
        path = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-" + str(elem) + "-fCf_sCF_0.05_gS_4711_3765/lausitz-5.0-pct-" + str(elem) + "-fCf_sCF_0.05_gS_4711_3765.output_trips.csv.gz"
        temp = pd.read_csv(path, compression= "gzip", sep=";")
        temp["trav_time"] = pd.to_timedelta(temp.trav_time)
        temp["trav_time2"] = temp.trav_time.astype('timedelta64[s]')/  pd.Timedelta(minutes=60)
        trav_time_cat = sixMinuteCategoriesUpToTwoHours(temp)
        trav_time_cat.rename(columns={"freq": "freq_" + str(elem)}, inplace = True )
        trav_time_5pct_categories.append(trav_time_cat)
# left join and calculate mean
trav_time_5pct_cat_all = pd.merge(trav_time_5pct_categories[0], trav_time_5pct_categories[1], on = ['category'], how='left')
for elem in range(2,10,1):
    trav_time_5pct_cat_all = pd.merge(trav_time_5pct_cat_all, trav_time_5pct_categories[elem], on = ['category'], how='left')

trav_time_5pct_cat_all['mean'] = trav_time_5pct_cat_all.iloc[:,1:11].mean(axis = 1)
trav_time_5pct_cat_all.insert(12, 'sample_size', "5-pct")
trav_time_5pct_cat_all.insert(13, 'alpha', 1.0)
trav_time_5pct_cat_all.insert(14, 'stuck_time', 30.0)
trav_time_5pct_cat_all.insert(15,'global_seed', "default") 

#### 10pct

In [15]:
trav_time_10pct_categories = []
for elem in range(1,11,1):
    path = "/home/lola/math_cluster/output/output-lausitz-10.0-pct-" +str(elem) + "-fCf_sCF_0.1_gS_4711_3765/lausitz-10.0-pct-" +str(elem) + "-fCf_sCF_0.1_gS_4711_3765.output_trips.csv.gz"
    temp = pd.read_csv(path, compression= "gzip", sep=";")
    temp["trav_time"] = pd.to_timedelta(temp.trav_time)
    temp["trav_time2"] = temp.trav_time.astype('timedelta64[s]')/  pd.Timedelta(minutes=60)
    trav_time_cat = sixMinuteCategoriesUpToTwoHours(temp)
    trav_time_cat.rename(columns={"freq": "freq_" + str(elem)}, inplace = True )
    trav_time_10pct_categories.append(trav_time_cat)
# left join and calculate mean
trav_time_10pct_cat_all = pd.merge(trav_time_10pct_categories[0], trav_time_10pct_categories[1], on = ['category'], how='left')
for elem in range(2,10,1):
    trav_time_10pct_cat_all = pd.merge(trav_time_10pct_cat_all, trav_time_10pct_categories[elem], on = ['category'], how='left')

trav_time_10pct_cat_all['mean'] = trav_time_10pct_cat_all.iloc[:,1:11].mean(axis = 1)
trav_time_10pct_cat_all.insert(12, 'sample_size', "10-pct")
trav_time_10pct_cat_all.insert(13, 'alpha', 1.0)
trav_time_10pct_cat_all.insert(14, 'stuck_time', 30.0)
trav_time_10pct_cat_all.insert(15,'global_seed', "default") 

  temp = pd.read_csv(path, compression= "gzip", sep=";")


#### 25 pct

In [19]:
path = "/home/lola/math_cluster/output/output-lausitz-25.0-pct-fCf_sCF_0.25_gS_4711_3765/lausitz-25.0-pct-fCf_sCF_0.25_gS_4711_3765.output_trips.csv.gz"
t_25pct = pd.read_csv(path, compression= "gzip", sep=";")
t_25pct["trav_time"] = pd.to_timedelta(t_25pct.trav_time)
t_25pct["trav_time2"] = t_25pct.trav_time.astype('timedelta64[s]')/  pd.Timedelta(minutes=60)
trav_time_25pct_categories = sixMinuteCategoriesUpToTwoHours(t_25pct)
trav_time_25pct_categories.rename(columns={"category": "sixMinCategories"}, inplace = True )
trav_time_25pct_categories.rename(columns={"freq": "mean"}, inplace = True )
trav_time_25pct_categories.insert(2, 'sample_size', "25-pct")
trav_time_25pct_categories.insert(3, 'alpha', 1.0)
trav_time_25pct_categories.insert(4, 'stuck_time', 30.0)
trav_time_25pct_categories.insert(5,'global_seed', "default") 

#### 50 pct

In [21]:
path = "/home/lola/math_cluster/output/output-lausitz-50.0-pct-fCf_sCF_0.5_gS_4711_3765/lausitz-50.0-pct-fCf_sCF_0.5_gS_4711_3765.output_trips.csv.gz"
t_50pct = pd.read_csv(path, compression= "gzip", sep=";")
t_50pct["trav_time"] = pd.to_timedelta(t_50pct.trav_time)
t_50pct["trav_time2"] = t_50pct.trav_time.astype('timedelta64[s]')/  pd.Timedelta(minutes=60)
trav_time_50pct_categories = sixMinuteCategoriesUpToTwoHours(t_50pct)
trav_time_50pct_categories.rename(columns={"category": "sixMinCategories"}, inplace = True )
trav_time_50pct_categories.rename(columns={"freq": "mean"}, inplace = True )
trav_time_50pct_categories.insert(2, 'sample_size', "50-pct")
trav_time_50pct_categories.insert(3, 'alpha', 1.0)
trav_time_50pct_categories.insert(4, 'stuck_time', 30.0)
trav_time_50pct_categories.insert(5,'global_seed', "default") 

In [22]:
# 25 pct doubled
path = "/home/lola/Nextcloud/Masterarbeit/03_Outputs_From_RunsLausitz/output-lausitz-25-pct-doubled-fCf_0.5_sCF_0.5_gS_4711_3765/lausitz-25-pct-doubled-fCf_0.5_sCF_0.5_gS_4711__3765.output_trips.csv.gz"
t_25pct_doubled = pd.read_csv(path, compression= "gzip", sep=";")
t_25pct_doubled["trav_time"] = pd.to_timedelta(t_25pct_doubled.trav_time)
t_25pct_doubled["trav_time2"] = t_25pct_doubled.trav_time.astype('timedelta64[s]')/  pd.Timedelta(minutes=60)
trav_time_25pct_doubled_categories = sixMinuteCategoriesUpToTwoHours(t_25pct_doubled)
trav_time_25pct_doubled_categories.rename(columns={"category": "sixMinCategories"}, inplace = True )
trav_time_25pct_doubled_categories.rename(columns={"freq": "mean"}, inplace = True )
trav_time_25pct_doubled_categories.insert(2, 'sample_size', "25-pct-doubled")
trav_time_25pct_doubled_categories.insert(3, 'alpha', 1.0)
trav_time_25pct_doubled_categories.insert(4, 'stuck_time', 30.0)
trav_time_25pct_doubled_categories.insert(5,'global_seed', "default") 



#### 100 pct

In [23]:
path = "/home/lola/math_cluster/output/output-lausitz-100.0-pct-fCf_sCF_1.0_gS_4711_3765/lausitz-100.0-pct-fCf_sCF_1.0_gS_4711_3765.output_trips.csv.gz"
t_100pct = pd.read_csv(path, compression= "gzip", sep=";")
t_100pct["trav_time"] = pd.to_timedelta(t_100pct.trav_time)
t_100pct["trav_time2"] = t_100pct.trav_time.astype('timedelta64[s]')/  pd.Timedelta(minutes=60)
trav_time_100pct_categories = sixMinuteCategoriesUpToTwoHours(t_100pct)
trav_time_100pct_categories.rename(columns={"category": "sixMinCategories"}, inplace = True )
trav_time_100pct_categories.rename(columns={"freq": "mean"}, inplace = True )
trav_time_100pct_categories.insert(2, 'sample_size', "100-pct")
trav_time_100pct_categories.insert(3, 'alpha', 1.0)
trav_time_100pct_categories.insert(4, 'stuck_time', 30.0)
trav_time_100pct_categories.insert(5,'global_seed', "default") 

In [24]:
# 25 pct quadrupled
path = "/home/lola/Nextcloud/Masterarbeit/03_Outputs_From_RunsLausitz/output-lausitz-25.0-pct-quadrupled-fCf_1.0_sCF_1.0_gS_4711_3765/lausitz-25-pct-quadrupled-fCf_1.0_sCF_1.0_gS_4711__3765.output_trips.csv.gz"
t_25pct_quadrupled = pd.read_csv(path, compression= "gzip", sep=";")
t_25pct_quadrupled["trav_time"] = pd.to_timedelta(t_25pct_quadrupled.trav_time)
t_25pct_quadrupled["trav_time2"] = t_25pct_quadrupled.trav_time.astype('timedelta64[s]')/  pd.Timedelta(minutes=60)
trav_time_25pct_quadrupled_categories = sixMinuteCategoriesUpToTwoHours(t_25pct_quadrupled)
trav_time_25pct_quadrupled_categories.rename(columns={"category": "sixMinCategories"}, inplace = True )
trav_time_25pct_quadrupled_categories.rename(columns={"freq": "mean"}, inplace = True )
trav_time_25pct_quadrupled_categories.insert(2, 'sample_size', "25-pct-quadrupled")
trav_time_25pct_quadrupled_categories.insert(3, 'alpha', 1.0)
trav_time_25pct_quadrupled_categories.insert(4, 'stuck_time', 30.0)
trav_time_25pct_quadrupled_categories.insert(5,'global_seed', "default") 


  t_25pct_quadrupled = pd.read_csv(path, compression= "gzip", sep=";")


#### Concat and write csv

In [25]:
trav_time_categories_all = pd.concat([df_avg_travel_time_dist_1pct_all, trav_time_5pct_cat_all, trav_time_10pct_cat_all,  trav_time_25pct_categories, trav_time_50pct_categories,trav_time_25pct_doubled_categories, trav_time_100pct_categories, trav_time_25pct_quadrupled_categories], axis = 0)
trav_time_categories_all.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/trav_time_categories_1_100.csv', index = False) 

## 7. Network Congestion Index

#### 1 pct

nci_1pct = []
for elem in range(1,11,1):
    path = "/home/lola/math_cluster/output/output-lausitz-1pct-"+ str(elem) + "-fCf_sCF_0.01_gS_default_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv" 
    temp = pd.read_csv(path, sep=",")
    temp.rename(columns={"congestion_index": "congestion_index_" + str(elem)}, inplace = True)
    nci_1pct.append(temp)

nci_1pct_df = pd.merge(nci_1pct[0], nci_1pct[1], on=['road_type','hour'], how='left')
for elem in range(2,10,1):
    nci_1pct_df = pd.merge(nci_1pct_df, nci_1pct[elem], on = ['road_type', 'hour'], how='left')

nci_1pct_df['congestion_index_mean'] = nci_1pct_df.iloc[: ,2:11].mean(axis=1)
nci_1pct_df.insert(13, 'sample_size', "1-pct")
nci_1pct_df.insert(14, 'alpha', 1.0)
nci_1pct_df.insert(15, 'stuck_time', 30.0)
nci_1pct_df.insert(16,'global_seed', "default")



nci_1pct_sCf = []
for elem in range(1,11,1):
    #       /home/lola/math_cluster/output/output-lausitz-1pct-1-fCf_0.01_sCF_0.03162_gS_default_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv
    path = "/home/lola/math_cluster/output/output-lausitz-1pct-"+ str(elem) + "-fCf_0.01_sCF_0.03162_gS_default_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv" 
    temp = pd.read_csv(path, sep=",")
    temp.rename(columns={"congestion_index": "congestion_index_" + str(elem)}, inplace = True)
    nci_1pct_sCf.append(temp)

nci_1pct_sCf_df = pd.merge(nci_1pct_sCf[0], nci_1pct_sCf[1], on=['road_type','hour'], how='left')
for elem in range(2,10,1):
    nci_1pct_sCf_df = pd.merge(nci_1pct_sCf_df, nci_1pct_sCf[elem], on = ['road_type', 'hour'], how='left')

nci_1pct_sCf_df['congestion_index_mean'] = nci_1pct_sCf_df.iloc[: ,2:11].mean(axis=1)
nci_1pct_sCf_df.insert(13, 'sample_size', "1-pct")
nci_1pct_sCf_df.insert(14, 'alpha', 0.75)
nci_1pct_sCf_df.insert(15, 'stuck_time', 30.0)
nci_1pct_sCf_df.insert(16,'global_seed', "default")


# random global seed
nci_1pct_rGs = []
rGs = [ 4711,3254, 2306, 6384,4338, 6003, 5502, 9377, 5621, 9002 ]
for seed in rGs:
    if (seed == 4711):
        global_seed = "rnd_" + str(seed)
        temp = nci_1pct_df[["road_type", "hour", "congestion_index_1"]].copy()
        nci_1pct_rGs.append(temp)
    elif (seed == 3254):
        global_seed  = "rnd_" + str(seed)
        path ="/home/lola/math_cluster/output/output-lausitz-1pct-1-fCf_sCF_0.01_gS_3254_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv"
        temp = pd.read_csv(path, sep = ",")
        temp.rename(columns={"congestion_index": "congestion_index_" + str(rGs.index(seed) + 1)}, inplace = True)
        nci_1pct_rGs.append(temp)
    else:
        global_seed  = "rnd_" + str(seed)
        path = "/home/lola/math_cluster/output/output-lausitz-1.0-pct-1-fCf_sCF_0.01_gS_"+str(seed) + "_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv"
        temp = pd.read_csv(path, sep = ",")
        temp.rename(columns={"congestion_index": "congestion_index_" + str(rGs.index(seed) + 1 )}, inplace = True)
        nci_1pct_rGs.append(temp)

nci_1pct_rGs_df = pd.merge(nci_1pct_rGs[0], nci_1pct_rGs[1], on=['road_type','hour'], how='left')
for elem in range(2,10,1):
    nci_1pct_rGs_df = pd.merge(nci_1pct_rGs_df, nci_1pct_rGs[elem], on = ['road_type', 'hour'], how='left')

nci_1pct_rGs_df['congestion_index_mean'] = nci_1pct_rGs_df.iloc[: ,2:11].mean(axis=1)
nci_1pct_rGs_df.insert(13, 'sample_size', "1-pct")
nci_1pct_rGs_df.insert(14, 'alpha', 1.0)
nci_1pct_rGs_df.insert(15, 'stuck_time', 30.0)
nci_1pct_rGs_df.insert(16,'global_seed', "rnd")



nci_1pct_sT = []
for elem in range(1,11,1):
    path = "/home/lola/math_cluster/output/output-lausitz-1-pct-" + str(elem) + "-fCf_sCF_0.01_gS_4711_sT_3000.0_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv" 
    temp = pd.read_csv(path, sep=",")
    temp.rename(columns={"congestion_index": "congestion_index_" + str(elem)}, inplace = True)
    nci_1pct_sT.append(temp)
    
nci_1pct_sT_df = pd.merge(nci_1pct_sT[0], nci_1pct_sT[1], on=['road_type','hour'], how='left')
for elem in range(2,10,1):
    nci_1pct_sT_df = pd.merge(nci_1pct_sT_df, nci_1pct_sT[elem], on = ['road_type', 'hour'], how='left')    
    
nci_1pct_sT_df['congestion_index_mean'] = nci_1pct_sT_df.iloc[: ,2:11].mean(axis=1)
nci_1pct_sT_df.insert(13, 'sample_size', "1-pct")
nci_1pct_sT_df.insert(14, 'alpha', 1.0)
nci_1pct_sT_df.insert(15, 'stuck_time', 3000.0)
nci_1pct_sT_df.insert(16,'global_seed', "default")


nci_1pct_sT_sCf = []
for elem in range(1,11,1):
    path = "/home/lola/math_cluster/output/output-lausitz-1-pct-" + str(elem) + "-fCf_0.01_sCF_0.03162_gS_4711_sT_3000.0_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv" 
    temp = pd.read_csv(path, sep=",")
    temp.rename(columns={"congestion_index": "congestion_index_" + str(elem)}, inplace = True)
    nci_1pct_sT_sCf.append(temp)
    
nci_1pct_sT_sCf_df = pd.merge(nci_1pct_sT_sCf[0], nci_1pct_sT_sCf[1], on=['road_type','hour'], how='left')
for elem in range(2,10,1):
    nci_1pct_sT_sCf_df = pd.merge(nci_1pct_sT_sCf_df, nci_1pct_sT_sCf[elem], on = ['road_type', 'hour'], how='left')    
    
nci_1pct_sT_sCf_df['congestion_index_mean'] = nci_1pct_sT_sCf_df.iloc[: ,2:11].mean(axis=1)
nci_1pct_sT_sCf_df.insert(13, 'sample_size', "1-pct")
nci_1pct_sT_sCf_df.insert(14, 'alpha', 0.75)
nci_1pct_sT_sCf_df.insert(15, 'stuck_time', 3000.0)
nci_1pct_sT_sCf_df.insert(16,'global_seed', "default")

nci_1pct_all = pd.concat([nci_1pct_df, nci_1pct_sCf_df, nci_1pct_rGs_df,  nci_1pct_sT_df, nci_1pct_sT_sCf_df ])

nci_1pct_all.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/nci_all_1pct_samples.csv', index = False) 

#### 5 pct

nci_5pct = []
for elem in range(1,11,1):
    if (elem ==6): 
        path = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-6-fCf_sCF_0.05_gS_4711_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv"
        temp = pd.read_csv(path, sep=",")
        temp.rename(columns={"congestion_index": "congestion_index_" + str(elem)}, inplace = True)
        nci_5pct.append(temp)


    else:
    #       /home/lola/math_cluster/output/output-lausitz-5.0-pct-1-fCf_sCF_0.05_gS_4711_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv
        path = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-"+ str(elem) + "-fCf_sCF_0.05_gS_4711_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv" 
        temp = pd.read_csv(path, sep=",")
        temp.rename(columns={"congestion_index": "congestion_index_" + str(elem)}, inplace = True)
        nci_5pct.append(temp)



nci_5pct_df = pd.merge(nci_5pct[0], nci_5pct[1], on=['road_type','hour'], how='left')
for elem in range(2,10,1):
    nci_5pct_df = pd.merge(nci_5pct_df, nci_5pct[elem], on = ['road_type', 'hour'], how='left')    
    
nci_5pct_df['congestion_index_mean'] = nci_5pct_df.iloc[: ,2:11].mean(axis=1)
nci_5pct_df.insert(13, 'sample_size', "5-pct")
nci_5pct_df.insert(14, 'alpha', 1.0)
nci_5pct_df.insert(15, 'stuck_time', 30.0)
nci_5pct_df.insert(16,'global_seed', "default")


nci_5pct_sCf = []
for elem in range(1,11,1):
    if (elem == 6):
        path = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-6-fCf_0.05_sCF_0.10574_gS_4711_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv" 
        temp = pd.read_csv(path, sep=",")
        temp.rename(columns={"congestion_index": "congestion_index_" + str(elem)}, inplace = True)
        nci_5pct_sCf.append(temp)

    else: 
    #           /home/lola/math_cluster/output/output-lausitz-5.0-pct-7-fCf_0.05_sCF_0.10574_gS_4711_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv
        path = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-"+ str(elem) + "-fCf_0.05_sCF_0.10574_gS_4711_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv" 
        temp = pd.read_csv(path, sep=",")
        temp.rename(columns={"congestion_index": "congestion_index_" + str(elem)}, inplace = True)
        nci_5pct_sCf.append(temp)

nci_5pct_sCf_df = pd.merge(nci_5pct_sCf[0], nci_5pct_sCf[1], on=['road_type','hour'], how='left')
for elem in range(2,10,1):
    nci_5pct_sCf_df = pd.merge(nci_5pct_sCf_df, nci_5pct_sCf[elem], on = ['road_type', 'hour'], how='left')    
    
nci_5pct_sCf_df['congestion_index_mean'] = nci_5pct_sCf_df.iloc[: ,2:11].mean(axis=1)
nci_5pct_sCf_df.insert(13, 'sample_size', "5-pct")
nci_5pct_sCf_df.insert(14, 'alpha',0.75)
nci_5pct_sCf_df.insert(15, 'stuck_time', 30.0)
nci_5pct_sCf_df.insert(16,'global_seed', "default")

# random global seed
nci_5pct_rGs = []

rGs = [ 4711,3254, 2306, 6384,4338, 6003, 5502, 9377, 5621, 9002 ]
for seed in rGs:
    if (seed == 4711):
        global_seed = "rnd_" + str(seed)
        temp = nci_5pct_df[["road_type", "hour", "congestion_index_1"]].copy()
        nci_5pct_rGs.append(temp)
    elif (seed == 3254):
        global_seed  = "rnd_" + str(seed)
        path ="/home/lola/math_cluster/output/output-lausitz-5.0-pct-1-fCf_sCF_0.05_gS_3254_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv"
        temp = pd.read_csv(path, sep = ",")
        temp.rename(columns={"congestion_index": "congestion_index_" + str(rGs.index(seed) + 1)}, inplace = True)
        nci_5pct_rGs.append(temp)
        
    else:
        if (seed == 6384 or seed == 6003):
            continue
        global_seed  = "rnd_" + str(seed)
        path = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-1-fCf_sCF_0.05_gS_" + str(seed) + "_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv"
        temp = pd.read_csv(path, sep = ",")
        temp.rename(columns={"congestion_index": "congestion_index_" + str(rGs.index(seed) + 1)}, inplace = True)
        nci_5pct_rGs.append(temp)


nci_5pct_rGs_df = pd.merge(nci_5pct_rGs[0], nci_5pct_rGs[1], on=['road_type','hour'], how='left')
for elem in range(2,8,1):
    nci_5pct_rGs_df = pd.merge(nci_5pct_rGs_df, nci_5pct_rGs[elem], on = ['road_type', 'hour'], how='left')

nci_5pct_rGs_df['congestion_index_mean'] = nci_5pct_rGs_df.iloc[: ,2:9].mean(axis=1)
nci_5pct_rGs_df.insert(11, 'sample_size', "5-pct")
nci_5pct_rGs_df.insert(12, 'alpha', 1.0)
nci_5pct_rGs_df.insert(13, 'stuck_time', 30.0)
nci_5pct_rGs_df.insert(14,'global_seed', "rnd")


# 5 pct, alpha 1.0, sT scaled
nci_5pct_sT = []
for elem in range(1,11,1):
    path = "/home/lola/math_cluster/output/output-lausitz-5-pct-" + str(elem) + "-fCf_sCF_0.05_gS_4711_sT_600.0_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv"
    temp = pd.read_csv(path)
    temp.rename(columns={"congestion_index": "congestion_index_" + str(elem)}, inplace = True)
    nci_5pct_sT.append(temp)

nci_5pct_sT_df = pd.merge(nci_5pct_sT[0], nci_5pct_sT[1], on=['road_type','hour'], how='left')
for elem in range(2,10,1):
    nci_5pct_sT_df = pd.merge(nci_5pct_sT_df, nci_5pct_sT[elem], on = ['road_type', 'hour'], how='left')


nci_5pct_sT_df['congestion_index_mean'] = nci_5pct_sT_df.iloc[: ,2:11].mean(axis=1)
nci_5pct_sT_df.insert(13, 'sample_size', "5-pct")
nci_5pct_sT_df.insert(14, 'alpha',1.0)
nci_5pct_sT_df.insert(15, 'stuck_time', 600.0)
nci_5pct_sT_df.insert(16,'global_seed', "default")



nci_5pct_sT_sCf = []
for elem in range(1,11,1):
    path = "/home/lola/math_cluster/output/output-lausitz-5-pct-" + str(elem) + "-fCf_0.05_sCF_0.10574_gS_4711_sT_600.0_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv"
    temp = pd.read_csv(path)
    temp.rename(columns={"congestion_index": "congestion_index_" + str(elem)}, inplace = True)
    nci_5pct_sT_sCf.append(temp)

nci_5pct_sT_sCf_df = pd.merge(nci_5pct_sT_sCf[0], nci_5pct_sT_sCf[1], on=['road_type','hour'], how='left')
for elem in range(2,10,1):
    nci_5pct_sT_sCf_df = pd.merge(nci_5pct_sT_sCf_df, nci_5pct_sT_sCf[elem], on = ['road_type', 'hour'], how='left')
    
nci_5pct_sT_sCf_df['congestion_index_mean'] = nci_5pct_sT_sCf_df.iloc[: ,2:11].mean(axis=1)
nci_5pct_sT_sCf_df.insert(13, 'sample_size', "5-pct")
nci_5pct_sT_sCf_df.insert(14, 'alpha',0.75)
nci_5pct_sT_sCf_df.insert(15, 'stuck_time', 600.0)
nci_5pct_sT_sCf_df.insert(16,'global_seed', "default")


nci_5pct_all = pd.concat([nci_5pct_df, nci_5pct_sCf_df,nci_5pct_rGs_df, nci_5pct_sT_df, nci_5pct_sT_sCf_df], axis = 0)

nci_5pct_all.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/nci_all_5pct_samples.csv', index = False) 

#### 10 pct

flowCapF = ["0.1"]
storCapF =  ["0.1", "0.17783"]
stuckTimes = ["30.0", "300.0"]

nci_10pct = pd.DataFrame()

counter = 0
for fCf in flowCapF:
    for sCf in storCapF:
        for sT in stuckTimes:
            for sampleNr in range(1,11,1):
                # declare path based on case 
                if((fCf == "0.1") & (sCf == "0.1") & (sT == "30.0")):
                    path = "/home/lola/math_cluster/output/output-lausitz-10.0-pct-" +str(sampleNr) + "-fCf_sCF_0.1_gS_4711_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv"       
                elif((fCf == "0.1") & (sCf ==  "0.17783") & (sT == "30.0")):
                    path = "/home/lola/math_cluster/output/output-lausitz-10.0-pct-" + str(sampleNr) + "-fCf_0.1_sCF_0.17783_gS_4711_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv"
                elif((fCf == "0.1") & (sCf == "0.1") & (sT == "300.0")):
                    path = "/home/lola/math_cluster/output/output-lausitz-10-pct-" + str(sampleNr) + "-fCf_sCF_0.1_gS_4711_sT_300.0_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv"
                elif((fCf == "0.1") & (sCf ==  "0.17783") & (sT == "300.0")):
                    path = "/home/lola/math_cluster/output/output-lausitz-10-pct-" + str(sampleNr) + "-fCf_0.1_sCF_0.17783_gS_4711_sT_300.0_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv"  
                else: 
                    print("case not found")
                    break
                temp = pd.read_csv(path, compression= "gzip", sep=";")
                temp.rename(columns={"congestion_index": "congestion_index_" + str(elem)}, inplace = True)
                
                

nci_10pct = []
for elem in range(1,11,1):
        path = "/home/lola/math_cluster/output/output-lausitz-10.0-pct-"+ str(elem) + "-fCf_sCF_0.1_gS_4711_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv" 
        temp = pd.read_csv(path)
        temp.rename(columns={"congestion_index": "congestion_index_" + str(elem)}, inplace = True)
        nci_10pct.append(temp)

nci_10pct_df = pd.merge(nci_10pct[0], nci_10pct[1], on=['road_type','hour'], how='left')
for elem in range(2,10,1):
    nci_10pct_df = pd.merge(nci_10pct_df, nci_10pct[elem], on = ['road_type', 'hour'], how='left')
    
nci_10pct_df['congestion_index_mean'] = nci_10pct_df.iloc[: ,2:11].mean(axis=1)        
nci_10pct_df.insert(13, 'sample_size', "10-pct")
nci_10pct_df.insert(14, 'alpha',1.0)
nci_10pct_df.insert(15, 'stuck_time', 30.0)
nci_10pct_df.insert(16,'global_seed', "default")



nci_10pct_sCf = []
for elem in range(1,11,1):
        path = "/home/lola/math_cluster/output/output-lausitz-10.0-pct-"+ str(elem) + "-fCf_0.1_sCF_0.17783_gS_4711_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv" 
        temp = pd.read_csv(path)
        temp.rename(columns={"congestion_index": "congestion_index_" + str(elem)}, inplace = True)
        nci_10pct_sCf.append(temp)

nci_10pct_sCf_df = pd.merge(nci_10pct_sCf[0], nci_10pct_sCf[1], on=['road_type','hour'], how='left')
for elem in range(2,10,1):
    nci_10pct_sCf_df = pd.merge(nci_10pct_sCf_df, nci_10pct_sCf[elem], on = ['road_type', 'hour'], how='left')


nci_10pct_sCf_df['congestion_index_mean'] = nci_10pct_sCf_df.iloc[: ,2:11].mean(axis=1)  
nci_10pct_sCf_df.insert(13, 'sample_size', "10-pct")
nci_10pct_sCf_df.insert(14, 'alpha',0.75)
nci_10pct_sCf_df.insert(15, 'stuck_time', 30.0)
nci_10pct_sCf_df.insert(16,'global_seed', "default")


nci_10pct_sT = []
for elem in range(1,11,1):
    path = "/home/lola/math_cluster/output/output-lausitz-10-pct-"+ str(elem) + "-fCf_sCF_0.1_gS_4711_sT_300.0_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv"
    temp = pd.read_csv(path, sep=",")
    temp.rename(columns={"congestion_index": "congestion_index_" + str(elem)}, inplace = True)
    nci_10pct_sT.append(temp)

nci_10pct_sT_df = pd.merge(nci_10pct_sT[0], nci_10pct_sT[1], on=['road_type','hour'], how='left')
for elem in range(2,10,1):
    nci_10pct_sT_df = pd.merge(nci_10pct_sT_df, nci_10pct_sT[elem], on = ['road_type', 'hour'], how='left')


nci_10pct_sT_df['congestion_index_mean'] = nci_10pct_sT_df.iloc[: ,2:11].mean(axis=1)  


nci_10pct_sT_df.insert(13, 'sample_size', "10-pct")
nci_10pct_sT_df.insert(14, 'alpha',1.0)
nci_10pct_sT_df.insert(15, 'stuck_time', 300.0)
nci_10pct_sT_df.insert(16,'global_seed', "default")


nci_10pct_sT_sCf = []
for elem in range(1,11,1):
    if (elem == 2):
        path = "/home/lola/Nextcloud/Masterarbeit/03_Outputs_From_RunsLausitz/output-lausitz-10-pct-2-fCf_0.1_sCF_0.17783_gS_4711_sT_300.0_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv"
        temp = pd.read_csv(path, sep=",")
        temp.rename(columns={"congestion_index": "congestion_index_" + str(elem)}, inplace = True)
        nci_10pct_sT_sCf.append(temp)
    else:
        path = "/home/lola/math_cluster/output/output-lausitz-10-pct-" +str(elem) + "-fCf_0.1_sCF_0.17783_gS_4711_sT_300.0_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv"
        temp = pd.read_csv(path, sep=",")
        temp.rename(columns={"congestion_index": "congestion_index_" + str(elem)}, inplace = True)
        nci_10pct_sT_sCf.append(temp)


nci_10pct_sT_sCf_df = pd.merge(nci_10pct_sT_sCf[0], nci_10pct_sT_sCf[1], on=['road_type','hour'], how='left')
for elem in range(2,9,1):
    nci_10pct_sT_sCf_df = pd.merge(nci_10pct_sT_sCf_df, nci_10pct_sT_sCf[elem], on = ['road_type', 'hour'], how='left')


nci_10pct_sT_sCf_df['congestion_index_mean'] = nci_10pct_sT_sCf_df.iloc[: ,2:10].mean(axis=1) 
nci_10pct_sT_sCf_df.insert(12, 'sample_size', "10-pct")
nci_10pct_sT_sCf_df.insert(13, 'alpha',0.75)
nci_10pct_sT_sCf_df.insert(14, 'stuck_time', 300.0)
nci_10pct_sT_sCf_df.insert(15,'global_seed', "default")




nci_10pct_all = pd.concat([nci_10pct_df, nci_10pct_sCf_df, nci_10pct_sT_df, nci_10pct_sT_sCf_df], axis = 0)

nci_10pct_all.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/nci_10pct_samples_some_missing.csv', index = False) 

#### 25 pct

path = "/home/lola/math_cluster/output/output-lausitz-25.0-pct-fCf_sCF_0.25_gS_4711_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv"
nci_25pct = pd.DataFrame(pd.read_csv(path, sep = ","))
nci_25pct.rename(columns={"congestion_index": "congestion_index_mean"}, inplace = True)
nci_25pct.insert(3, 'sample_size', "25-pct")
nci_25pct.insert(4, 'alpha',1.0)
nci_25pct.insert(5, 'stuck_time', 30.0)
nci_25pct.insert(6,'global_seed', "default")




path = "/home/lola/math_cluster/output/output-lausitz-25.0-pct-fCf_0.25_sCF_0.35355_gS_4711_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv"
nci_25pct_sCf = pd.DataFrame(pd.read_csv(path, sep = ","))
nci_25pct_sCf.rename(columns={"congestion_index": "congestion_index_mean"}, inplace = True)
nci_25pct_sCf.insert(3, 'sample_size', "25-pct")
nci_25pct_sCf.insert(4, 'alpha',0.75)
nci_25pct_sCf.insert(5, 'stuck_time', 30.0)
nci_25pct_sCf.insert(6,'global_seed', "default")

path = "/home/lola/math_cluster/output/output-lausitz-25-pct-1-fCf_sCF_0.25_gS_4711_sT_120.0_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv"
nci_25pct_sT = pd.DataFrame(pd.read_csv(path, sep = ","))
nci_25pct_sT.rename(columns={"congestion_index": "congestion_index_mean"}, inplace = True)
nci_25pct_sT.insert(3, 'sample_size', "25-pct")
nci_25pct_sT.insert(4, 'alpha',1.0)
nci_25pct_sT.insert(5, 'stuck_time',120.0)
nci_25pct_sT.insert(6,'global_seed', "default")


path = "/home/lola/Nextcloud/Masterarbeit/03_Outputs_From_RunsLausitz/output-lausitz-25-pct-1-fCf_0.25_sCF_0.35355_gS_4711_sT_120.0_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv"
nci_25pct_sT_sCf = pd.DataFrame(pd.read_csv(path, sep = ","))
nci_25pct_sT_sCf.insert(3, 'sample_size', "25-pct")
nci_25pct_sT_sCf.insert(4, 'sample_nr', 1)
nci_25pct_sT_sCf.insert(5, 'alpha',0.75)
nci_25pct_sT_sCf.insert(6, 'stuck_time',120.0)
nci_25pct_sT_sCf.insert(7,'global_seed', "default")




nci_25pct_all = pd.concat([nci_25pct, nci_25pct_sCf, nci_25pct_sT, nci_25pct_sT_sCf], axis = 0)

nci_25pct_all.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/nci_25pct_samples_sCf_sT_missing.csv', index = False) 

#### 50 pct

path = "/home/lola/math_cluster/output/output-lausitz-50.0-pct-fCf_sCF_0.5_gS_4711_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv"
nci_50pct = pd.DataFrame(pd.read_csv(path, sep = ","))
nci_50pct.rename(columns={"congestion_index": "congestion_index_mean"}, inplace = True)
nci_50pct.insert(3, 'sample_size', "50-pct")
nci_50pct.insert(4, 'alpha',1.0)
nci_50pct.insert(5, 'stuck_time', 30.0)
nci_50pct.insert(6,'global_seed', "default")


path = "/home/lola/math_cluster/output/output-lausitz-50.0-pct-fCf_0.5_sCF_0.5946_gS_4711_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv"
nci_50pct_sCf = pd.DataFrame(pd.read_csv(path, sep = ","))
nci_25pct_sCf.rename(columns={"congestion_index": "congestion_index_mean"}, inplace = True)
nci_50pct_sCf.insert(3, 'sample_size', "50-pct")
nci_50pct_sCf.insert(4, 'alpha',0.75)
nci_50pct_sCf.insert(5, 'stuck_time', 30.0)
nci_50pct_sCf.insert(6,'global_seed', "default")


path = "/home/lola/math_cluster/output/output-lausitz-50-pct-1-fCf_sCF_0.5_gS_4711_sT_60.0_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv"
nci_50pct_sT = pd.DataFrame(pd.read_csv(path, sep = ","))
nci_50pct_sT.rename(columns={"congestion_index": "congestion_index_mean"}, inplace = True)
nci_50pct_sT.insert(3, 'sample_size', "50-pct")
nci_50pct_sT.insert(4, 'alpha',1.0)
nci_50pct_sT.insert(5, 'stuck_time', 60.0)
nci_50pct_sT.insert(6,'global_seed', "default")

path = "/home/lola/math_cluster/output/output-lausitz-50-pct-1-fCf_0.5_sCF_0.5946_gS_4711_sT_60.0_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv"
nci_50pct_sT_sCf = pd.DataFrame(pd.read_csv(path, sep = ","))
nci_50pct_sT_sCf.rename(columns={"congestion_index": "congestion_index_mean"}, inplace = True)
nci_50pct_sT_sCf.insert(3, 'sample_size', "50-pct")
nci_50pct_sT_sCf.insert(4, 'alpha',0.75)
nci_50pct_sT_sCf.insert(5, 'stuck_time', 60.0)
nci_50pct_sT_sCf.insert(6,'global_seed', "default")



path = "/home/lola/Nextcloud/Masterarbeit/03_Outputs_From_RunsLausitz/output-lausitz-25-pct-doubled-fCf_0.5_sCF_0.5_gS_4711_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv"
nci_25pct_doubled = pd.DataFrame(pd.read_csv(path, sep = ","))
nci_25pct_doubled.rename(columns={"congestion_index": "congestion_index_mean"}, inplace = True)
nci_25pct_doubled.insert(3, 'sample_size', "25-pct-doubled")
nci_25pct_doubled.insert(4, 'alpha',1.0)
nci_25pct_doubled.insert(5, 'stuck_time', 30.0)
nci_25pct_doubled.insert(6,'global_seed', "default")

nci_50pct_all = pd.concat([nci_50pct, nci_50pct_sCf, nci_50pct_sT, nci_50pct_sT_sCf, nci_25pct_doubled], axis = 0)

nci_50pct_all.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/nci_50pct_samples.csv', index = False) 

#### 100 pct

path = "/home/lola/math_cluster/output/output-lausitz-100.0-pct-fCf_sCF_1.0_gS_4711_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv"
nci_100pct = pd.DataFrame(pd.read_csv(path, sep = ","))
nci_100pct.rename(columns={"congestion_index": "congestion_index_mean"}, inplace = True)
nci_100pct.insert(3, 'sample_size', "100-pct")
nci_100pct.insert(4, 'alpha',1.0)
nci_100pct.insert(5, 'stuck_time', 30.0)
nci_100pct.insert(6,'global_seed', "default")

path = "/home/lola/Nextcloud/Masterarbeit/03_Outputs_From_RunsLausitz/output-lausitz-25.0-pct-quadrupled-fCf_1.0_sCF_1.0_gS_4711_3765/analysis/traffic/traffic_stats_by_road_type_and_hour.csv"
nci_25pct_quadrupled = pd.DataFrame(pd.read_csv(path, sep = ","))
nci_25pct_quadrupled.rename(columns={"congestion_index": "congestion_index_mean"}, inplace = True)
nci_25pct_quadrupled.insert(3, 'sample_size', "25-pct-quadrupled")
nci_25pct_quadrupled.insert(4, 'alpha',1.0)
nci_25pct_quadrupled.insert(5, 'stuck_time', 30.0)
nci_25pct_quadrupled.insert(6,'global_seed', "default")

#### concat and write csv

nci_all = pd.concat([nci_1pct_all, nci_5pct_all, nci_10pct_all, nci_25pct_all, nci_50pct_all, nci_100pct, nci_25pct_quadrupled], axis = 0)

nci_all.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/nci_100pct_only_5pct_rGs_6384_6003_missing.csv', index = False) 

### Data on one motorway, primary and residential link

In [None]:
def readStatsByLinkAndReturnValue(pathToFile, LinkId, sampleSize, sampleNr, alpha, stuckTime ):
    scaling_factor_to_100pct = 100.0/ sampleSize
    df = pd.read_csv(pathToFile)
    temp = df[df['link_id'] == LinkId].copy()
    temp['road_capacity_utilization'] = temp['road_capacity_utilization']*scaling_factor_to_100pct
    temp['simulated_traffic_volume'] = temp['simulated_traffic_volume']*scaling_factor_to_100pct
    temp['vol_car'] = temp['vol_car']*scaling_factor_to_100pct
    temp.insert(9,'sample_nr', sampleNr)
    temp.insert(10, 'sample_size', str(sampleSize) + "-pct")
    temp.insert(11, 'alpha', alpha)
    temp.insert(12, 'stuck_time', stuckTime)
    return temp


In [None]:
def readStatsByLinkAndReturnShortCongLinks(pathToFile, sampleSize, sampleNr, alpha, stuckTime, cI_value, link_length):
    scaling_factor_to_100pct = 100.0/ sampleSize
    df = pd.read_csv(pathToFile)
    temp = df[(df['congestion_index']<= cI_value) & (df['lane_km'] <= link_length)].copy()
    temp['road_capacity_utilization'] = temp['road_capacity_utilization']*scaling_factor_to_100pct
    temp['simulated_traffic_volume'] = temp['simulated_traffic_volume']*scaling_factor_to_100pct
    temp['vol_car'] = temp['vol_car']*scaling_factor_to_100pct
    temp.insert(9,'sample_nr', sampleNr)
    temp.insert(10, 'sample_size', str(sampleSize) + "-pct")
    temp.insert(11, 'alpha', alpha)
    temp.insert(12, 'stuck_time', stuckTime)
    return temp


##### 1 pct

In [None]:
flowCapF = ["0.01"]
storCapF =  ["0.01", "0.03162"]

# initialize empty data frames
df_3_links_mw_pr_res_1pct = pd.DataFrame()
df_link_cong_len_leq_100m_1pct = pd.DataFrame()

for fCf in flowCapF:
    for sCf in storCapF:
        for sampleNr in range(1,11,1):
            # calculate adjusted stuck time
            default_stuck_time = 30.0
            adjusted_stuck_time = 30.0/float(flowCapF[0])
            # declare sample size as str "1-pct"
            sample_size_as_string = str(int(float(fCf)*100)) + "-pct"
            sample_size = float(fCf)*100

            if ((fCf == "0.01") & (sCf == '0.01')):
                # declare alpha
                alpha = 1.0
                # paths for case 1 and 3 
                path_case1 = "/home/lola/math_cluster/output/output-lausitz-1pct-"+ str(sampleNr) +"-fCf_sCF_" + sCf + "_gS_default_3765/analysis/traffic/traffic_stats_by_link_daily.csv"
                path_case3 = "/home/lola/math_cluster/output/output-lausitz-1-pct-" +str(sampleNr) +"-fCf_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/analysis/traffic/traffic_stats_by_link_daily.csv"
                
                temp_mw_case1 = readStatsByLinkAndReturnValue(pathToFile=path_case1, LinkId="314328566", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                temp_mw_case3 = readStatsByLinkAndReturnValue(pathToFile=path_case3, LinkId="314328566", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                temp_pr_case1 = readStatsByLinkAndReturnValue(pathToFile=path_case1, LinkId="314040202", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                temp_pr_case3 = readStatsByLinkAndReturnValue(pathToFile=path_case3, LinkId="314040202", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                temp_rs_case1 = readStatsByLinkAndReturnValue(pathToFile=path_case1, LinkId="130268155", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                temp_rs_case3 = readStatsByLinkAndReturnValue(pathToFile=path_case3, LinkId="130268155", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                # concatenate with existing values
                df_3_links_mw_pr_res_1pct = pd.concat([df_3_links_mw_pr_res_1pct, temp_mw_case1, temp_mw_case3, temp_pr_case1, temp_pr_case3, temp_rs_case1, temp_rs_case3 ], ignore_index= True)

                temp_case1_C100m = readStatsByLinkAndReturnShortCongLinks(path_case1, sample_size, sampleNr, alpha, default_stuck_time, 0.5, 0.1)
                temp_case3_C100m = readStatsByLinkAndReturnShortCongLinks(path_case3, sample_size, sampleNr, alpha, adjusted_stuck_time, 0.5, 0.1)

                df_link_cong_len_leq_100m_1pct = pd.concat([df_link_cong_len_leq_100m_1pct, temp_case1_C100m, temp_case3_C100m ], ignore_index= True)


            else:
                alpha = 0.75
                path_case2 = "/home/lola/math_cluster/output/output-lausitz-1pct-" +str(sampleNr) + "-fCf_0.01_sCF_" + sCf + "_gS_default_3765/analysis/traffic/traffic_stats_by_link_daily.csv"
                path_case4 = "/home/lola/math_cluster/output/output-lausitz-1-pct-" + str(sampleNr) + "-fCf_" + fCf + "_sCF_" + sCf + "_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/analysis/traffic/traffic_stats_by_link_daily.csv"
                
                temp_mw_case2 = readStatsByLinkAndReturnValue(pathToFile=path_case2, LinkId="314328566", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                temp_mw_case4 = readStatsByLinkAndReturnValue(pathToFile=path_case4, LinkId="314328566", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                temp_pr_case2 = readStatsByLinkAndReturnValue(pathToFile=path_case2, LinkId="314040202", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                temp_pr_case4 = readStatsByLinkAndReturnValue(pathToFile=path_case4, LinkId="314040202", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                temp_rs_case2 = readStatsByLinkAndReturnValue(pathToFile=path_case2, LinkId="130268155", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                temp_rs_case4 = readStatsByLinkAndReturnValue(pathToFile=path_case4, LinkId="130268155", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                # concatenate with existing values
                df_3_links_mw_pr_res_1pct = pd.concat([df_3_links_mw_pr_res_1pct, temp_mw_case2, temp_mw_case4, temp_pr_case2, temp_pr_case4, temp_rs_case2, temp_rs_case4 ], ignore_index= True)

                temp_case2_C100m = readStatsByLinkAndReturnShortCongLinks(path_case2, sample_size, sampleNr, alpha, default_stuck_time, 0.5, 0.1)
                temp_case4_C100m = readStatsByLinkAndReturnShortCongLinks(path_case4, sample_size, sampleNr, alpha, adjusted_stuck_time, 0.5, 0.1)

                df_link_cong_len_leq_100m_1pct = pd.concat([df_link_cong_len_leq_100m_1pct, temp_case2_C100m, temp_case4_C100m ], ignore_index= True)


##### 5 pct

In [None]:
flowCapF = ["0.05"]
storCapF =  ["0.05", "0.10574"]


df_3_links_mw_pr_res_5pct = pd.DataFrame()
df_link_cong_len_leq_100m_5pct = pd.DataFrame()


for fCf in flowCapF:
    for sCf in storCapF:
            for sampleNr in range(1,11,1):
                # calculate adjusted stuck time
                default_stuck_time = 30.0
                adjusted_stuck_time = 30.0/float(flowCapF[0])
                # declare sample size as str "1-pct"
                sample_size_as_string = str(int(float(fCf)*100)) + "-pct"
                sample_size = float(fCf)*100
                # declare path based on case 

                
                if((fCf == "0.05") & (sCf == "0.05")):
                    alpha = 1.0
                    if (sampleNr == 6):
                        path_case1  = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-6-fCf_sCF_0.05_gS_4711_3765/analysis/traffic/traffic_stats_by_link_daily.csv"
                        path_case3 =  path = "/home/lola/math_cluster/output/output-lausitz-5-pct-" + str(sampleNr) + "-fCf_sCF_0.05_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/analysis/traffic/traffic_stats_by_link_daily.csv"

                    else: 
                        path_case1 = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-" + str(sampleNr) + "-fCf_sCF_0.05_gS_4711_3765/analysis/traffic/traffic_stats_by_link_daily.csv"
                        path_case3 =  path = "/home/lola/math_cluster/output/output-lausitz-5-pct-" + str(sampleNr) + "-fCf_sCF_0.05_gS_4711_sT_" + str(adjusted_stuck_time) + "_3765/analysis/traffic/traffic_stats_by_link_daily.csv"
                    
                    temp_mw_case1 = readStatsByLinkAndReturnValue(pathToFile=path_case1, LinkId="314328566", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                    temp_mw_case3 = readStatsByLinkAndReturnValue(pathToFile=path_case3, LinkId="314328566", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                    temp_pr_case1 = readStatsByLinkAndReturnValue(pathToFile=path_case1, LinkId="314040202", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                    temp_pr_case3 = readStatsByLinkAndReturnValue(pathToFile=path_case3, LinkId="314040202", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                    temp_rs_case1 = readStatsByLinkAndReturnValue(pathToFile=path_case1, LinkId="130268155", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                    temp_rs_case3 = readStatsByLinkAndReturnValue(pathToFile=path_case3, LinkId="130268155", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                    # concatenate with existing values
                    df_3_links_mw_pr_res_5pct = pd.concat([df_3_links_mw_pr_res_5pct, temp_mw_case1, temp_mw_case3, temp_pr_case1, temp_pr_case3, temp_rs_case1, temp_rs_case3 ], ignore_index= True)

                    # count how many links are congested with a length of less thann 100m

                    temp_case1_C100m = readStatsByLinkAndReturnShortCongLinks(path_case1, sample_size, sampleNr, alpha, default_stuck_time, 0.5, 0.1)
                    temp_case3_C100m = readStatsByLinkAndReturnShortCongLinks(path_case3, sample_size, sampleNr, alpha, adjusted_stuck_time, 0.5, 0.1)

                    df_link_cong_len_leq_100m_5pct = pd.concat([df_link_cong_len_leq_100m_5pct, temp_case1_C100m, temp_case3_C100m ], ignore_index= True)
                else:
                    alpha = 0.75
                    # case 2
                    if(sampleNr == 6):
                        path_case2 = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-6-fCf_0.05_sCF_0.10574_gS_4711_3765/analysis/traffic/traffic_stats_by_link_daily.csv"
                        path_case4 = "/home/lola/math_cluster/output/output-lausitz-5-pct-" + str(sampleNr) + "-fCf_0.05_sCF_0.10574_gS_4711_sT_600.0_3765/analysis/traffic/traffic_stats_by_link_daily.csv"
                    else:
                        path_case2 = "/home/lola/math_cluster/output/output-lausitz-5.0-pct-" + str(sampleNr) + "-fCf_0.05_sCF_0.10574_gS_4711_3765/analysis/traffic/traffic_stats_by_link_daily.csv"
                        path_case4 = "/home/lola/math_cluster/output/output-lausitz-5-pct-" + str(sampleNr) + "-fCf_0.05_sCF_0.10574_gS_4711_sT_600.0_3765/analysis/traffic/traffic_stats_by_link_daily.csv"
                    temp_mw_case2 = readStatsByLinkAndReturnValue(pathToFile=path_case2, LinkId="314328566", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                    temp_mw_case4 = readStatsByLinkAndReturnValue(pathToFile=path_case4, LinkId="314328566", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                    temp_pr_case2 = readStatsByLinkAndReturnValue(pathToFile=path_case2, LinkId="314040202", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                    temp_pr_case4 = readStatsByLinkAndReturnValue(pathToFile=path_case4, LinkId="314040202", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                    temp_rs_case2 = readStatsByLinkAndReturnValue(pathToFile=path_case2, LinkId="130268155", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                    temp_rs_case4 = readStatsByLinkAndReturnValue(pathToFile=path_case4, LinkId="130268155", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                    # concatenate with existing values
                    df_3_links_mw_pr_res_5pct = pd.concat([df_3_links_mw_pr_res_5pct, temp_mw_case2, temp_mw_case4, temp_pr_case2, temp_pr_case4, temp_rs_case2, temp_rs_case4 ], ignore_index= True)

                    # count how many links are congested with a length of less thann 100m
                    temp_case2_C100m = readStatsByLinkAndReturnShortCongLinks(path_case2, sample_size, sampleNr, alpha, default_stuck_time, 0.5, 0.1)
                    temp_case4_C100m = readStatsByLinkAndReturnShortCongLinks(path_case4, sample_size, sampleNr, alpha, adjusted_stuck_time, 0.5, 0.1)

                    df_link_cong_len_leq_100m_5pct = pd.concat([df_link_cong_len_leq_100m_5pct, temp_case2_C100m, temp_case4_C100m ], ignore_index= True)


##### 10 pct

In [None]:
flowCapF = ["0.1"]
storCapF =  ["0.1", "0.17783"]

df_3_links_mw_pr_res_10pct = pd.DataFrame()
df_link_cong_len_leq_100m_10pct = pd.DataFrame()

counter = 0
for fCf in flowCapF:
    for sCf in storCapF:
            for sampleNr in range(1,11,1):
                # calculate adjusted stuck time
                default_stuck_time = 30.0
                adjusted_stuck_time = 30.0/float(flowCapF[0])
                # declare sample size as str "1-pct"
                sample_size_as_string = str(int(float(fCf)*100)) + "-pct"
                sample_size = float(fCf)*100
                # declare path based on case
                if((fCf == "0.1") & (sCf == "0.1")):
                    alpha = 1.0
                    path_case1 = "/home/lola/math_cluster/output/output-lausitz-10.0-pct-" +str(sampleNr) + "-fCf_sCF_0.1_gS_4711_3765/analysis/traffic/traffic_stats_by_link_daily.csv"
                    path_case3 = "/home/lola/math_cluster/output/output-lausitz-10-pct-" + str(sampleNr) + "-fCf_sCF_0.1_gS_4711_sT_300.0_3765/analysis/traffic/traffic_stats_by_link_daily.csv"
                    
                    temp_mw_case1 = readStatsByLinkAndReturnValue(pathToFile=path_case1, LinkId="314328566", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                    temp_mw_case3 = readStatsByLinkAndReturnValue(pathToFile=path_case3, LinkId="314328566", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                    temp_pr_case1 = readStatsByLinkAndReturnValue(pathToFile=path_case1, LinkId="314040202", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                    temp_pr_case3 = readStatsByLinkAndReturnValue(pathToFile=path_case3, LinkId="314040202", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                    temp_rs_case1 = readStatsByLinkAndReturnValue(pathToFile=path_case1, LinkId="130268155", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                    temp_rs_case3 = readStatsByLinkAndReturnValue(pathToFile=path_case3, LinkId="130268155", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                    # concatenate with existing values
                    df_3_links_mw_pr_res_10pct = pd.concat([df_3_links_mw_pr_res_10pct, temp_mw_case1, temp_mw_case3, temp_pr_case1, temp_pr_case3, temp_rs_case1, temp_rs_case3 ], ignore_index= True)

                    #count how many links are congested with a length of less than 100m
                    temp_case1_C100m = readStatsByLinkAndReturnShortCongLinks(path_case1, sample_size, sampleNr, alpha, default_stuck_time, 0.5, 0.1)
                    temp_case3_C100m = readStatsByLinkAndReturnShortCongLinks(path_case3, sample_size, sampleNr, alpha, adjusted_stuck_time, 0.5, 0.1)

                    df_link_cong_len_leq_100m_10pct = pd.concat([df_link_cong_len_leq_100m_10pct, temp_case1_C100m, temp_case3_C100m ], ignore_index= True)
               
                else:
                    alpha = 0.75
                    if(sampleNr==2):
                        path_case4 = "/home/lola/Nextcloud/Masterarbeit/03_Outputs_From_RunsLausitz/output-lausitz-10-pct-2-fCf_0.1_sCF_0.17783_gS_4711_sT_300.0_3765/analysis/traffic/traffic_stats_by_link_daily.csv"
                        temp_mw_case4 = readStatsByLinkAndReturnValue(pathToFile=path_case4, LinkId="314328566", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)
                        temp_pr_case4 = readStatsByLinkAndReturnValue(pathToFile=path_case4, LinkId="314040202", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)
                        temp_rs_case4 = readStatsByLinkAndReturnValue(pathToFile=path_case4, LinkId="130268155", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)
                        df_3_links_mw_pr_res_10pct = pd.concat([df_3_links_mw_pr_res_10pct, temp_mw_case4, temp_pr_case4, temp_rs_case4 ], ignore_index= True)

                        temp_case4_C100m = readStatsByLinkAndReturnShortCongLinks(path_case4, sample_size, sampleNr, alpha, adjusted_stuck_time, 0.5, 0.1)
                        df_link_cong_len_leq_100m_10pct = pd.concat([df_link_cong_len_leq_100m_10pct, temp_case4_C100m ], ignore_index= True)


                    else:
                        path_case4 = "/home/lola/math_cluster/output/output-lausitz-10-pct-" + str(sampleNr) + "-fCf_0.1_sCF_0.17783_gS_4711_sT_300.0_3765/analysis/traffic/traffic_stats_by_link_daily.csv"
                        temp_mw_case4 = readStatsByLinkAndReturnValue(pathToFile=path_case4, LinkId="314328566", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)
                        temp_pr_case4 = readStatsByLinkAndReturnValue(pathToFile=path_case4, LinkId="314040202", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)
                        temp_rs_case4 = readStatsByLinkAndReturnValue(pathToFile=path_case4, LinkId="130268155", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)
                        df_3_links_mw_pr_res_10pct = pd.concat([df_3_links_mw_pr_res_10pct, temp_mw_case4, temp_pr_case4, temp_rs_case4 ], ignore_index= True)

                        temp_case4_C100m = readStatsByLinkAndReturnShortCongLinks(path_case4, sample_size, sampleNr, alpha, adjusted_stuck_time, 0.5, 0.1)
                        df_link_cong_len_leq_100m_10pct = pd.concat([df_link_cong_len_leq_100m_10pct, temp_case4_C100m ], ignore_index= True)
                          
                    
                    path_case2 = "/home/lola/math_cluster/output/output-lausitz-10.0-pct-" + str(sampleNr) + "-fCf_0.1_sCF_0.17783_gS_4711_3765/analysis/traffic/traffic_stats_by_link_daily.csv"
                    temp_mw_case2 = readStatsByLinkAndReturnValue(pathToFile=path_case2, LinkId="314328566", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                    temp_pr_case2 = readStatsByLinkAndReturnValue(pathToFile=path_case2, LinkId="314040202", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                    temp_rs_case2 = readStatsByLinkAndReturnValue(pathToFile=path_case2, LinkId="130268155", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                    

                    # concatenate with existing values
                    df_3_links_mw_pr_res_10pct = pd.concat([df_3_links_mw_pr_res_10pct, temp_mw_case2,  temp_pr_case2,  temp_rs_case2 ], ignore_index= True)

                    # count how many links are congested with a length of less than 100m

                    temp_case2_C100m = readStatsByLinkAndReturnShortCongLinks(path_case2, sample_size, sampleNr, alpha, default_stuck_time, 0.5, 0.1)
                    

                    df_link_cong_len_leq_100m_10pct = pd.concat([df_link_cong_len_leq_100m_10pct, temp_case2_C100m, temp_case4_C100m ], ignore_index= True)


               

##### 25 pct

In [None]:
flowCapF = ["0.25"]
storCapF =  ["0.25", "0.35355"]

df_3_links_mw_pr_res_25pct = pd.DataFrame()
df_link_cong_len_leq_100m_25pct = pd.DataFrame()
counter = 0
for fCf in flowCapF:
    for sCf in storCapF:
            for sampleNr in range(1,2,1):
                # calculate adjusted stuck time
                default_stuck_time = 30.0
                adjusted_stuck_time = 30.0/float(flowCapF[0])
                # declare sample size as str "1-pct"
                sample_size_as_string = str(int(float(fCf)*100)) + "-pct"
                sample_size = float(fCf)*100
                # declare path based on case
                if((fCf == "0.25") & (sCf == "0.25")):
                    alpha = 1.0
                    path_case1 = "/home/lola/math_cluster/output/output-lausitz-25.0-pct-fCf_sCF_0.25_gS_4711_3765/analysis/traffic/traffic_stats_by_link_daily.csv"
                    path_case3 = "/home/lola/math_cluster/output/output-lausitz-25-pct-1-fCf_sCF_0.25_gS_4711_sT_120.0_3765/analysis/traffic/traffic_stats_by_link_daily.csv"
                    
                    temp_mw_case1 = readStatsByLinkAndReturnValue(pathToFile=path_case1, LinkId="314328566", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                    temp_mw_case3 = readStatsByLinkAndReturnValue(pathToFile=path_case3, LinkId="314328566", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                    temp_pr_case1 = readStatsByLinkAndReturnValue(pathToFile=path_case1, LinkId="314040202", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                    temp_pr_case3 = readStatsByLinkAndReturnValue(pathToFile=path_case3, LinkId="314040202", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                    temp_rs_case1 = readStatsByLinkAndReturnValue(pathToFile=path_case1, LinkId="130268155", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                    temp_rs_case3 = readStatsByLinkAndReturnValue(pathToFile=path_case3, LinkId="130268155", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                    # concatenate with existing values
                    df_3_links_mw_pr_res_25pct = pd.concat([df_3_links_mw_pr_res_25pct, temp_mw_case1, temp_mw_case3, temp_pr_case1, temp_pr_case3, temp_rs_case1, temp_rs_case3 ], ignore_index= True)

                    #count how many links are congested with a length of less than 100m
                    temp_case1_C100m = readStatsByLinkAndReturnShortCongLinks(path_case1, sample_size, sampleNr, alpha, default_stuck_time, 0.5, 0.1)
                    temp_case3_C100m = readStatsByLinkAndReturnShortCongLinks(path_case3, sample_size, sampleNr, alpha, adjusted_stuck_time, 0.5, 0.1)

                    df_link_cong_len_leq_100m_25pct = pd.concat([df_link_cong_len_leq_100m_25pct, temp_case1_C100m, temp_case3_C100m ], ignore_index= True)

                else:
                    alpha = 0.75
                    path_case2 = "/home/lola/math_cluster/output/output-lausitz-25.0-pct-fCf_0.25_sCF_0.35355_gS_4711_3765/analysis/traffic/traffic_stats_by_link_daily.csv"
                    path_case4 = "/home/lola/Nextcloud/Masterarbeit/03_Outputs_From_RunsLausitz/output-lausitz-25-pct-1-fCf_0.25_sCF_0.35355_gS_4711_sT_120.0_3765/analysis/traffic/traffic_stats_by_link_daily.csv"
                    temp_mw_case2 = readStatsByLinkAndReturnValue(pathToFile=path_case2, LinkId="314328566", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                    temp_mw_case4 = readStatsByLinkAndReturnValue(pathToFile=path_case4, LinkId="314328566", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                    temp_pr_case2 = readStatsByLinkAndReturnValue(pathToFile=path_case2, LinkId="314040202", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                    temp_pr_case4 = readStatsByLinkAndReturnValue(pathToFile=path_case4, LinkId="314040202", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                    temp_rs_case2 = readStatsByLinkAndReturnValue(pathToFile=path_case2, LinkId="130268155", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                    temp_rs_case4 = readStatsByLinkAndReturnValue(pathToFile=path_case4, LinkId="130268155", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                    # concatenate with existing values
                    df_3_links_mw_pr_res_25pct = pd.concat([df_3_links_mw_pr_res_25pct, temp_mw_case2, temp_mw_case4, temp_pr_case2, temp_pr_case4, temp_rs_case2, temp_rs_case4 ], ignore_index= True)

                    # count how many links are congested with a length of less thann 100m
                    temp_case2_C100m = readStatsByLinkAndReturnShortCongLinks(path_case2, sample_size, sampleNr, alpha, default_stuck_time, 0.5, 0.1)
                    temp_case4_C100m = readStatsByLinkAndReturnShortCongLinks(path_case4, sample_size, sampleNr, alpha, adjusted_stuck_time, 0.5, 0.1)

                    df_link_cong_len_leq_100m_25pct = pd.concat([df_link_cong_len_leq_100m_25pct, temp_case2_C100m, temp_case4_C100m ], ignore_index= True)


##### 50 pct

In [None]:
flowCapF = ["0.5"]
storCapF =  ["0.5", "0.5946"]

df_3_links_mw_pr_res_50pct = pd.DataFrame()
df_link_cong_len_leq_100m_50pct = pd.DataFrame()

counter = 0
for fCf in flowCapF:
    for sCf in storCapF:
            for sampleNr in range(1,2,1):
                # calculate adjusted stuck time
                default_stuck_time = 30.0
                adjusted_stuck_time = 30.0/float(flowCapF[0])
                # declare sample size as str "1-pct"
                sample_size_as_string = str(int(float(fCf)*100)) + "-pct"
                sample_size = float(fCf)*100
                # declare path based on case 
                if((fCf == "0.5") & (sCf == "0.5")):
                    alpha = 1.0
                    path_case1 = "/home/lola/math_cluster/output/output-lausitz-50.0-pct-fCf_sCF_0.5_gS_4711_3765/analysis/traffic/traffic_stats_by_link_daily.csv"
                    path_case3 = "/home/lola/math_cluster/output/output-lausitz-50-pct-1-fCf_sCF_0.5_gS_4711_sT_60.0_3765/analysis/traffic/traffic_stats_by_link_daily.csv"
                    temp_mw_case1 = readStatsByLinkAndReturnValue(pathToFile=path_case1, LinkId="314328566", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                    temp_mw_case3 = readStatsByLinkAndReturnValue(pathToFile=path_case3, LinkId="314328566", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                    temp_pr_case1 = readStatsByLinkAndReturnValue(pathToFile=path_case1, LinkId="314040202", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                    temp_pr_case3 = readStatsByLinkAndReturnValue(pathToFile=path_case3, LinkId="314040202", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                    temp_rs_case1 = readStatsByLinkAndReturnValue(pathToFile=path_case1, LinkId="130268155", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                    temp_rs_case3 = readStatsByLinkAndReturnValue(pathToFile=path_case3, LinkId="130268155", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                    # concatenate with existing values
                    df_3_links_mw_pr_res_50pct = pd.concat([df_3_links_mw_pr_res_50pct, temp_mw_case1, temp_mw_case3, temp_pr_case1, temp_pr_case3, temp_rs_case1, temp_rs_case3 ], ignore_index= True)

                    #count how many links are congested with a length of less than 100m
                    temp_case1_C100m = readStatsByLinkAndReturnShortCongLinks(path_case1, sample_size, sampleNr, alpha, default_stuck_time, 0.5, 0.1)
                    temp_case3_C100m = readStatsByLinkAndReturnShortCongLinks(path_case3, sample_size, sampleNr, alpha, adjusted_stuck_time, 0.5, 0.1)

                    df_link_cong_len_leq_100m_50pct = pd.concat([df_link_cong_len_leq_100m_50pct, temp_case1_C100m, temp_case3_C100m ], ignore_index= True)

                else:
                    alpha = 0.75
                    path_case2 = "/home/lola/math_cluster/output/output-lausitz-50.0-pct-fCf_0.5_sCF_0.5946_gS_4711_3765/analysis/traffic/traffic_stats_by_link_daily.csv"
                    path_case4 = "/home/lola/math_cluster/output/output-lausitz-50-pct-1-fCf_0.5_sCF_0.5946_gS_4711_sT_60.0_3765/analysis/traffic/traffic_stats_by_link_daily.csv"
                    
                    temp_mw_case2 = readStatsByLinkAndReturnValue(pathToFile=path_case2, LinkId="314328566", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                    temp_mw_case4 = readStatsByLinkAndReturnValue(pathToFile=path_case4, LinkId="314328566", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                    temp_pr_case2 = readStatsByLinkAndReturnValue(pathToFile=path_case2, LinkId="314040202", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                    temp_pr_case4 = readStatsByLinkAndReturnValue(pathToFile=path_case4, LinkId="314040202", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                    temp_rs_case2 = readStatsByLinkAndReturnValue(pathToFile=path_case2, LinkId="130268155", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=default_stuck_time)
                    temp_rs_case4 = readStatsByLinkAndReturnValue(pathToFile=path_case4, LinkId="130268155", sampleSize= sample_size, sampleNr = sampleNr, alpha=alpha, stuckTime=adjusted_stuck_time)

                    # concatenate with existing values
                    df_3_links_mw_pr_res_50pct = pd.concat([df_3_links_mw_pr_res_50pct, temp_mw_case2, temp_mw_case4, temp_pr_case2, temp_pr_case4, temp_rs_case2, temp_rs_case4 ], ignore_index= True)   



##### 100 pct

In [None]:
flowCapF = ["1.0"]
storCapF =  ["1.0"]


df_3_links_mw_pr_res_100pct = pd.DataFrame()
df_link_cong_len_leq_100m_100pct = pd.DataFrame()

counter = 0
for fCf in flowCapF:
    for sCf in storCapF:
            for sampleNr in range(1,2,1):
                # declare path based on case 
                alpha = 1.0
                sample_size = float(fCf)*100
                path = "/home/lola/math_cluster/output/output-lausitz-100.0-pct-fCf_sCF_1.0_gS_4711_3765/analysis/traffic/traffic_stats_by_link_daily.csv"
                temp_mw_case1 = readStatsByLinkAndReturnValue(pathToFile=path_case1, LinkId="314328566", sampleSize= sample_size, sampleNr = sampleNr, alpha=1.0, stuckTime=30.0)
                temp_pr_case1 = readStatsByLinkAndReturnValue(pathToFile=path_case1, LinkId="314040202", sampleSize= sample_size, sampleNr = sampleNr, alpha=1.0, stuckTime=30.0)
                temp_rs_case1 = readStatsByLinkAndReturnValue(pathToFile=path_case1, LinkId="130268155", sampleSize= sample_size, sampleNr = sampleNr, alpha=1.0, stuckTime=30.0)

                df_3_links_mw_pr_res_100pct = pd.concat([df_3_links_mw_pr_res_100pct, temp_mw_case1, temp_pr_case1, temp_rs_case1 ], ignore_index= True)

                #count how many links are congested with a length of less than 100m
                temp_case1_C100m = readStatsByLinkAndReturnShortCongLinks(path_case1, sample_size, sampleNr, alpha, default_stuck_time, 0.5, 0.1)

                df_link_cong_len_leq_100m_100pct = pd.concat([df_link_cong_len_leq_100m_100pct, temp_case1_C100m ], ignore_index= True)  

##### concat and write output

In [None]:
df_3_links_mw_pr_res_1_100 = pd.concat([df_3_links_mw_pr_res_1pct, df_3_links_mw_pr_res_5pct, df_3_links_mw_pr_res_10pct, df_3_links_mw_pr_res_25pct, df_3_links_mw_pr_res_50pct, df_3_links_mw_pr_res_100pct], ignore_index= True)

In [None]:
df_3_links_mw_pr_res_1_100.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/traffic_stats_by_link_314328566_314040202_130268155.csv', index = False) 

In [None]:
df_link_cong_len_leq_100m_1_100 = pd.concat([df_link_cong_len_leq_100m_1pct, df_link_cong_len_leq_100m_5pct, df_link_cong_len_leq_100m_10pct, df_link_cong_len_leq_100m_25pct, df_link_cong_len_leq_100m_50pct, df_link_cong_len_leq_100m_100pct], ignore_index = True)

In [None]:
df_link_cong_len_leq_100m_1pct[(df_link_cong_len_leq_100m_1pct["alpha"] == 1.0) & (df_link_cong_len_leq_100m_1pct["stuck_time"] == 30.0) & (df_link_cong_len_leq_100m_1pct["sample_nr"] == 10.0)].shape[0]

57

In [None]:
df_link_cong_len_leq_100m_1_100.to_csv('/home/lola/Nextcloud/Masterarbeit/03_Outputs/traffic_stats_by_link_with_cI_leq0.5_and_length_leq100m_1_100pct.csv', index = False)