## 2RPFS Problem (Cmax objective) - Data treatment of result files 

In [119]:
import pandas as pd
import numpy as np
import os, fnmatch
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import warnings
warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=DeprecationWarning)
import glob

%matplotlib inline

In [120]:
import sys
if sys.version_info[0] < 3: 
    from StringIO import StringIO
else:
    from io import StringIO

### List files in the output folder 

In [121]:
rootfolder = os.getcwd()
file_list = glob.glob(os.path.join(rootfolder, 'output') + '/*.csv', recursive=True)
file_list

['C:\\Users\\czt0\\Documents\\doutorado_files\\2RPFS_Cmax_Budget\\output\\separation_100 jobs-2.csv',
 'C:\\Users\\czt0\\Documents\\doutorado_files\\2RPFS_Cmax_Budget\\output\\separation_100 jobs-3.csv',
 'C:\\Users\\czt0\\Documents\\doutorado_files\\2RPFS_Cmax_Budget\\output\\separation_100 jobs-4.csv',
 'C:\\Users\\czt0\\Documents\\doutorado_files\\2RPFS_Cmax_Budget\\output\\separation_100 jobs.csv',
 'C:\\Users\\czt0\\Documents\\doutorado_files\\2RPFS_Cmax_Budget\\output\\separation_100 jobs_a40_RB1004005.csv',
 'C:\\Users\\czt0\\Documents\\doutorado_files\\2RPFS_Cmax_Budget\\output\\separation_150 jobs-2.csv',
 'C:\\Users\\czt0\\Documents\\doutorado_files\\2RPFS_Cmax_Budget\\output\\separation_150 jobs.csv',
 'C:\\Users\\czt0\\Documents\\doutorado_files\\2RPFS_Cmax_Budget\\output\\separation_200 jobs-2.csv',
 'C:\\Users\\czt0\\Documents\\doutorado_files\\2RPFS_Cmax_Budget\\output\\separation_200 jobs-3.csv',
 'C:\\Users\\czt0\\Documents\\doutorado_files\\2RPFS_Cmax_Budget\\output\\

### Read all the CSV files 

In [122]:
# Alternative script to treat files with incorrect number of coluns or faulty lines
def alternative_csv_reader(filename, delimiter=',', header=0, names=None):
    with open(filename, 'r') as file:
        lines = file.readlines() 
        count = 1
        line_list = []
        num_columns = 20
        for line in lines:  # Strips the newline character 
            #print("line{}: {}".format(count, line.strip())) 
            nc = len(line.split(','))
            if 'executionId,' in line:
                #num_columns = nc
                print('Detected {0} columns in CSV file.'.format(nc))
            else:
                if 'none,' in line:
                    if nc == num_columns:
                        line_list.append(line)
                    elif nc > num_columns:  # treat strange truncated lines
                        line = line[line.rfind('none,'):]
                        nc = len(line.split(','))
                        if nc == num_columns:
                            print('WARN: truncating line {0}, for having more columns than expected.'.format(count))
                            line_list.append(line)
                        else:
                            print('WARN: Ignoring line {0}, since it has {1} columns, instead of {2}: '.format(count, nc, num_columns), line)
                    else:  # Ignore line
                        print('WARN: Ignoring line {0}: '.format(count), line)    
                elif len(line_list[-1].split(',')) < num_columns:  # current line is a continuation of the previous one
                    line_list[-1] = line_list[-1].replace('\n', '') + line
                    print('*** Treated line {0}: '.format(count), line_list[-1])
                else:  # Ignore line
                    print('WARN: Ignoring line {0}: '.format(count), line)
            count += 1
        # assert all lines have the same number of columns
        count = 1
        for line in line_list:
            nc = len(line.split(','))
            if nc != num_columns:
                print('ERROR: Line {0} has {1} columns, instead of {2}: '.format(count, nc, num_columns), line)
            count += 1
        text_data = StringIO(''.join(line_list))
        #print('line_list: ', str(line_list))
        #print('text_data: ', text_data)
        df = pd.read_csv(text_data, delimiter=delimiter, header=header, names=names)
        return df

### Process all CSV files and append all data to a single dataframe (one per solution method: Wilson, Wagner) 

In [123]:
%%time

df_wilson = pd.DataFrame()
df_wagner = pd.DataFrame()
for filename in file_list:
    print('Processing file ', filename)
    try:
        df_ = pd.read_csv(filename, delimiter=',', header=0, names=['executionId','ub_name','instance_name','alpha','n','m','budget_Gamma','cmax','permutation','time_spent','time_to_best_sol','iterations','num_visited_solutions','num_improvements','is_optimal','validated','gap','lb','cost','cmax_dp'])
    except:  # try alternative method to read csv lines
        df_ = alternative_csv_reader(filename, delimiter=',', header=0, names=['executionId','ub_name','instance_name','alpha','n','m','budget_Gamma','cmax','permutation','time_spent','time_to_best_sol','iterations','num_visited_solutions','num_improvements','is_optimal','validated','gap','lb','cost','cmax_dp'])
    if 'wagner' not in filename:  # Wilson result file
        df_wilson = df_wilson.append(df_.copy())
    else:  # Wagner result file
        df_wagner = df_wagner.append(df_.copy())

Processing file  C:\Users\czt0\Documents\doutorado_files\2RPFS_Cmax_Budget\output\separation_100 jobs-2.csv
Processing file  C:\Users\czt0\Documents\doutorado_files\2RPFS_Cmax_Budget\output\separation_100 jobs-3.csv
Processing file  C:\Users\czt0\Documents\doutorado_files\2RPFS_Cmax_Budget\output\separation_100 jobs-4.csv
Processing file  C:\Users\czt0\Documents\doutorado_files\2RPFS_Cmax_Budget\output\separation_100 jobs.csv
Processing file  C:\Users\czt0\Documents\doutorado_files\2RPFS_Cmax_Budget\output\separation_100 jobs_a40_RB1004005.csv
Processing file  C:\Users\czt0\Documents\doutorado_files\2RPFS_Cmax_Budget\output\separation_150 jobs-2.csv
Processing file  C:\Users\czt0\Documents\doutorado_files\2RPFS_Cmax_Budget\output\separation_150 jobs.csv
Processing file  C:\Users\czt0\Documents\doutorado_files\2RPFS_Cmax_Budget\output\separation_200 jobs-2.csv
Detected 20 columns in CSV file.
Detected 20 columns in CSV file.
Detected 20 columns in CSV file.
Detected 20 columns in CSV fi

In [124]:
df_wilson

Unnamed: 0,executionId,ub_name,instance_name,alpha,n,m,budget_Gamma,cmax,permutation,time_spent,time_to_best_sol,iterations,num_visited_solutions,num_improvements,is_optimal,validated,gap,lb,cost,cmax_dp
0,none,mip_separation,RB1005001.txt,50,100,2,20 20,3718.5,95 45 93 40 49 80 98 87 9 74 72 81 18 47 42 2...,7494.75,7494.75,39,39,38,false,true,0.0587984,3512,3718.5,3718.5
1,none,mip_separation,RB1005001.txt,50,100,2,20 40,4009,95 45 93 40 49 80 98 87 9 74 72 81 18 47 42 2...,7470.19,7470.19,37,37,36,false,true,0.0457806,3833.5,4009,4009
2,none,mip_separation,RB1005001.txt,50,100,2,20 60,4258.5,95 45 93 40 49 80 98 87 9 74 72 81 18 47 42 2...,7500.44,7500.44,39,39,38,false,true,0.040562,4092.5,4258.5,4258.5
3,none,mip_separation,RB1005001.txt,50,100,2,20 80,4393,21 22 66 17 55 27 7 76 82 32 12 69 49 72 4 1 ...,7492.5,7492.5,40,40,39,false,true,0.0229363,4294.5,4393,4393
4,none,mip_separation,RB1005001.txt,50,100,2,20 100,4518,21 28 89 1 82 88 95 4 34 40 43 50 55 42 9 91 ...,5187.12,5187.12,35,35,34,true,true,0,4518,4518,4518
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,none,mip_separation,RB0505010.txt,50,50,2,100 20,2196,27 11 3 30 21 9 34 42 2 38 47 29 33 26 49 14 ...,10.9374,10.9374,5,5,4,true,true,0,2196,2196,2196
996,none,mip_separation,RB0505010.txt,50,50,2,100 40,2196,20 44 30 18 41 8 37 49 27 35 1 29 13 24 42 17...,16.2523,16.2523,6,6,5,true,true,0,2196,2196,2196
997,none,mip_separation,RB0505010.txt,50,50,2,100 60,2196,27 25 13 20 12 46 41 40 24 2 42 31 9 49 6 11 ...,8.65535,8.65535,4,4,3,true,true,0,2196,2196,2196
998,none,mip_separation,RB0505010.txt,50,50,2,100 80,2196,14 41 27 35 1 42 21 11 28 8 18 9 49 20 12 47 ...,12.9265,12.9265,5,5,4,true,true,0,2196,2196,2196


In [125]:
df_wagner.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4759 entries, 0 to 999
Data columns (total 20 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   executionId            4759 non-null   object
 1   ub_name                4759 non-null   object
 2   instance_name          4759 non-null   object
 3   alpha                  4759 non-null   object
 4   n                      4759 non-null   object
 5   m                      4759 non-null   object
 6   budget_Gamma           4759 non-null   object
 7   cmax                   4759 non-null   object
 8   permutation            4759 non-null   object
 9   time_spent             4759 non-null   object
 10  time_to_best_sol       4759 non-null   object
 11  iterations             4759 non-null   object
 12  num_visited_solutions  4759 non-null   object
 13  num_improvements       4759 non-null   object
 14  is_optimal             4759 non-null   object
 15  validated             

### Remove duplicated header rows from both dataframes 

In [126]:
def find_invalid_values(df):
    all_invalid_values = set()
    for col in df:
        if col not in ['executionId','ub_name','instance_name','budget_Gamma','permutation','is_optimal','validated']:
            # 'alpha','n','m','cmax','time_spent','time_to_best_sol','iterations','num_visited_solutions','num_improvements','gap','lb','cost','cmax_dp'
            a = pd.to_numeric(df[col], errors='coerce')
            idx = a.isna()
            invalid_values = df.loc[idx][col].unique()
            all_invalid_values.update(invalid_values)
        #elif col in ['is_optimal','validated']
    print('Invalid values:', all_invalid_values)
    return all_invalid_values

In [127]:
find_invalid_values(df_wagner)

Invalid values: {'num_visited_solutions', 'cost', 'time_spent', 'alpha', 'num_improvements', 'iterations', 'gap', 'm', 'cmax_dp', 'time_to_best_sol', 'lb', 'cmax', 'n'}


{'alpha',
 'cmax',
 'cmax_dp',
 'cost',
 'gap',
 'iterations',
 'lb',
 'm',
 'n',
 'num_improvements',
 'num_visited_solutions',
 'time_spent',
 'time_to_best_sol'}

In [128]:
def filter_invalid_values(df):
    # IMPORTANT: AVOID FILTERING 'NAN' VALUES
    for invalid_value in find_invalid_values(df):
        if isinstance(invalid_value, str):  # Evita filtrar os nan
            df = df[~(df == invalid_value).any(axis=1)]
    return df

In [129]:
df_wilson = filter_invalid_values(df_wilson)
find_invalid_values(df_wilson)

Invalid values: {'num_visited_solutions', 'cost', 'time_spent', 'alpha', 'num_improvements', 'iterations', 'gap', 'm', 'cmax_dp', 'time_to_best_sol', 'lb', 'cmax', 'n'}
Invalid values: set()


set()

In [130]:
df_wagner = filter_invalid_values(df_wagner)
find_invalid_values(df_wagner)

Invalid values: {'num_visited_solutions', 'cost', 'time_spent', 'alpha', 'num_improvements', 'iterations', 'gap', 'm', 'cmax_dp', 'time_to_best_sol', 'lb', 'cmax', 'n'}
Invalid values: set()


set()

### Convert column types from object 

In [131]:
def convert_column_types(df):
    for col in df:
        if col in ['alpha','n','m','cmax','time_spent','time_to_best_sol','iterations','num_visited_solutions','num_improvements','gap','lb','cost','cmax_dp']:
            df[col] = pd.to_numeric(df[col], errors='coerce')
        elif col in ['is_optimal','validated']:
            df[col] = df[col].astype('bool')
    return df

In [132]:
%%time
df_wilson = convert_column_types(df_wilson)
df_wagner = convert_column_types(df_wagner)
df_wagner.dtypes

Wall time: 70.3 ms


executionId               object
ub_name                   object
instance_name             object
alpha                      int64
n                          int64
m                          int64
budget_Gamma              object
cmax                     float64
permutation               object
time_spent               float64
time_to_best_sol         float64
iterations                 int64
num_visited_solutions      int64
num_improvements           int64
is_optimal                  bool
validated                   bool
gap                      float64
lb                       float64
cost                     float64
cmax_dp                  float64
dtype: object

### Trim existing string columns 

In [133]:
def trim_all_columns(df):
    """
    Trim whitespace from ends of each value across all series in dataframe
    """
    trim_strings = lambda x: x.strip() if isinstance(x, str) else x
    return df.applymap(trim_strings)

In [134]:
df_wilson = trim_all_columns(df_wilson)
df_wagner = trim_all_columns(df_wagner)

### Include a column with the name of the underlying C&CG MILP Model

In [135]:
df_wilson['model'] = 'Wilson'
df_wagner['model'] = 'Wagner'

### Concatenate dataframes 

In [136]:
df = df_wilson.append(df_wagner)
df.head(4)

Unnamed: 0,executionId,ub_name,instance_name,alpha,n,m,budget_Gamma,cmax,permutation,time_spent,...,iterations,num_visited_solutions,num_improvements,is_optimal,validated,gap,lb,cost,cmax_dp,model
0,none,mip_separation,RB1005001.txt,50,100,2,20 20,3718.5,95 45 93 40 49 80 98 87 9 74 72 81 18 47 42 25...,7494.746535,...,39,39,38,True,True,0.058798,3512.0,3718.5,3718.5,Wilson
1,none,mip_separation,RB1005001.txt,50,100,2,20 40,4009.0,95 45 93 40 49 80 98 87 9 74 72 81 18 47 42 25...,7470.192566,...,37,37,36,True,True,0.045781,3833.5,4009.0,4009.0,Wilson
2,none,mip_separation,RB1005001.txt,50,100,2,20 60,4258.5,95 45 93 40 49 80 98 87 9 74 72 81 18 47 42 25...,7500.439856,...,39,39,38,True,True,0.040562,4092.5,4258.5,4258.5,Wilson
3,none,mip_separation,RB1005001.txt,50,100,2,20 80,4393.0,21 22 66 17 55 27 7 76 82 32 12 69 49 72 4 1 8...,7492.503556,...,40,40,39,True,True,0.022936,4294.5,4393.0,4393.0,Wilson


### Split the column budget_Gamma into Gamma1 and Gamma2 

In [137]:
# new data frame with split value columns 
new = df["budget_Gamma"].str.split(" ", n = 1, expand = True) 
# making separate first name column from new data frame 
df["Gamma1"]= new[0] 
# making separate last name column from new data frame 
df["Gamma2"]= new[1] 
# convert Gamma columns to numeric
df["Gamma1"] = pd.to_numeric(df["Gamma1"], errors='coerce')
df["Gamma2"] = pd.to_numeric(df["Gamma2"], errors='coerce')

### Round columns containing time (in seconds) 

In [138]:
df['time_spent'] = df['time_spent'].round(2)
df['time_to_best_sol'] = df['time_to_best_sol'].round(2)

### Sort data according to model, instance_name, alpha, n, m, Gamma1 and Gamma2 and set index

In [139]:
print('Sorting dataset...')
df = df.sort_values(['model', 'n', 'm', 'alpha', 'instance_name', 'Gamma1', 'Gamma2'])
display(df.dtypes)
df = df.set_index(['model', 'n', 'm', 'alpha', 'instance_name', 'Gamma1', 'Gamma2'])
display(df.head(6))

Sorting dataset...


executionId               object
ub_name                   object
instance_name             object
alpha                      int64
n                          int64
m                          int64
budget_Gamma              object
cmax                     float64
permutation               object
time_spent               float64
time_to_best_sol         float64
iterations                 int64
num_visited_solutions      int64
num_improvements           int64
is_optimal                  bool
validated                   bool
gap                      float64
lb                       float64
cost                     float64
cmax_dp                  float64
model                     object
Gamma1                     int64
Gamma2                     int64
dtype: object

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,executionId,ub_name,budget_Gamma,cmax,permutation,time_spent,time_to_best_sol,iterations,num_visited_solutions,num_improvements,is_optimal,validated,gap,lb,cost,cmax_dp
model,n,m,alpha,instance_name,Gamma1,Gamma2,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Wagner,50,2,20,RB0504001.txt,20,20,none,mip_separation,20 20,1648.6,36 47 13 5 8 20 23 11 12 1 40 2 24 9 33 45 10 ...,10.08,10.08,8,8,7,True,True,-1.379192e-16,1648.6,1648.6,1648.6
Wagner,50,2,20,RB0504001.txt,20,40,none,mip_separation,20 40,1648.6,8 22 11 32 28 25 42 48 45 13 46 16 3 41 15 24 ...,59.92,59.92,35,35,34,True,True,0.0,1648.6,1648.6,1648.6
Wagner,50,2,20,RB0504001.txt,20,60,none,mip_separation,20 60,1694.4,36 22 12 1 44 41 3 50 49 47 19 28 27 31 20 16 ...,155.21,155.21,58,58,57,True,True,-2.683825e-16,1694.4,1694.4,1694.4
Wagner,50,2,20,RB0504001.txt,20,80,none,mip_separation,20 80,1733.8,36 22 9 5 16 12 15 38 20 50 34 49 28 25 11 14 ...,2.11,2.11,6,6,5,True,True,-1.311418e-16,1733.8,1733.8,1733.8
Wagner,50,2,20,RB0504001.txt,20,100,none,mip_separation,20 100,1758.0,36 22 45 7 31 8 27 17 1 20 42 48 18 44 32 39 1...,0.85,0.85,3,3,2,True,True,-2.586731e-16,1758.0,1758.0,1758.0
Wagner,50,2,20,RB0504001.txt,40,20,none,mip_separation,40 20,1729.0,36 47 45 46 31 14 38 37 1 3 23 19 33 40 20 43 ...,0.87,0.87,3,3,2,True,True,0.0,1729.0,1729.0,1729.0


### Find missing results, for a given value of alpha, n and m

For a given group of alpha, n, m and budget_Gamma, there should be 10 results.

First we will build a dataframe with the instances list and all required budget values.

In [142]:
data = []
rootfolder = os.getcwd()
jobs_folders = glob.glob(os.path.join(rootfolder, 'instances', 'robust', 'ying', 'data', '*/'), recursive=False)
for job_path in jobs_folders:
    alpha_folders = glob.glob(os.path.join(job_path, '*/'), recursive=False)
    n = job_path[job_path.find('data')+5:job_path.rfind(' jobs')]
    #print('n: {}'.format(n))
    for alpha_path in alpha_folders:
        alpha = alpha_path[alpha_path.find('jobs')+5:alpha_path.rfind('%')]
        #print('alpha: {}'.format(alpha))
        instance_paths = glob.glob(os.path.join(alpha_path, '*'), recursive=False)
        for instance_path in instance_paths:
            instance_name = instance_path[instance_path.find('%')+2:]
            #print(instance_name)
            for gamma1 in [20, 40, 60, 80, 100]:
                for gamma2 in [20, 40, 60, 80, 100]:
                    for model in ['Wilson', 'Wagner']:
                        data.append([model, instance_name, alpha, n, 2, gamma1, gamma2])
df_instances = pd.DataFrame(data, columns=['model', 'instance_name', 'alpha', 'n', 'm', 'Gamma1', 'Gamma2'])
for col in df_instances:
    if col in ['alpha','n','m','Gamma1','Gamma2']:
        df_instances[col] = pd.to_numeric(df_instances[col], errors='coerce')
display(df_instances.dtypes)
df_instances = df_instances.set_index(['model', 'n', 'm', 'alpha', 'instance_name', 'Gamma1', 'Gamma2'])
display(df_instances)

model            object
instance_name    object
alpha             int64
n                 int64
m                 int64
Gamma1            int64
Gamma2            int64
dtype: object

model,n,m,alpha,instance_name,Gamma1,Gamma2
Wilson,10,2,10,RB0103001.txt,20,20
Wagner,10,2,10,RB0103001.txt,20,20
Wilson,10,2,10,RB0103001.txt,20,40
Wagner,10,2,10,RB0103001.txt,20,40
Wilson,10,2,10,RB0103001.txt,20,60
...,...,...,...,...,...,...
Wagner,50,2,50,RB0505010.txt,100,60
Wilson,50,2,50,RB0505010.txt,100,80
Wagner,50,2,50,RB0505010.txt,100,80
Wilson,50,2,50,RB0505010.txt,100,100


Now, lets join the instances dataframe with the results one (left join).

In [148]:
df_joined = df_instances.join(df, how='left')
df_joined

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,executionId,ub_name,budget_Gamma,cmax,permutation,time_spent,time_to_best_sol,iterations,num_visited_solutions,num_improvements,is_optimal,validated,gap,lb,cost,cmax_dp
model,n,m,alpha,instance_name,Gamma1,Gamma2,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Wagner,10,2,10,RB0103001.txt,20,20,,,,,,,,,,,,,,,,
Wagner,10,2,10,RB0103001.txt,20,40,,,,,,,,,,,,,,,,
Wagner,10,2,10,RB0103001.txt,20,60,,,,,,,,,,,,,,,,
Wagner,10,2,10,RB0103001.txt,20,80,,,,,,,,,,,,,,,,
Wagner,10,2,10,RB0103001.txt,20,100,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wilson,200,2,50,RB2005010.txt,100,20,,,,,,,,,,,,,,,,
Wilson,200,2,50,RB2005010.txt,100,40,,,,,,,,,,,,,,,,
Wilson,200,2,50,RB2005010.txt,100,60,,,,,,,,,,,,,,,,
Wilson,200,2,50,RB2005010.txt,100,80,,,,,,,,,,,,,,,,


Now we will export to CSV a list with all rows with NaN values (missing experimental results).

In [153]:
missing_df = df_joined[df_joined.isnull().any(axis=1)].reset_index()[['model', 'n', 'm', 'alpha', 'instance_name', 'Gamma1', 'Gamma2']]
print('Saving file on folder: ' + rootfolder)
fname = os.path.join(rootfolder, '2RPFS_Cmax_missing_results.csv')
missing_df.to_csv(fname, sep=';')
print('Saved: ' + fname)

Saving file on folder: C:\Users\czt0\Documents\doutorado_files\2RPFS_Cmax_Budget
Saved: C:\Users\czt0\Documents\doutorado_files\2RPFS_Cmax_Budget\2RPFS_Cmax_missing_results.csv


In [23]:
df_grouped = df.groupby(['alpha', 'n', 'm', 'budget_Gamma']).agg({'executionId' : ['count']}).reset_index()
df_grouped.columns = [ ' '.join(str(i) for i in col) for col in df_grouped.columns]
#df_grouped.reset_index(inplace=True)
df_grouped

Unnamed: 0,alpha,n,m,budget_Gamma,executionId count
0,10,50,2,100 100,10
1,10,50,2,100 20,10
2,10,50,2,100 40,10
3,10,50,2,100 60,10
4,10,50,2,100 80,10
...,...,...,...,...,...
495,50,200,2,80 100,19
496,50,200,2,80 20,19
497,50,200,2,80 40,19
498,50,200,2,80 60,19


In [24]:
table = pd.pivot_table(df, values='executionId', index=['alpha', 'n'], columns=['Gamma1', 'Gamma2'], aggfunc='count', fill_value=0)
with pd.option_context('display.max_rows', None, 'display.max_columns', None): 
    display(table)

Unnamed: 0_level_0,Gamma1,20,20,20,20,20,40,40,40,40,40,60,60,60,60,60,80,80,80,80,80,100,100,100,100,100
Unnamed: 0_level_1,Gamma2,20,40,60,80,100,20,40,60,80,100,20,40,60,80,100,20,40,60,80,100,20,40,60,80,100
alpha,n,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2
10,50,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10
10,100,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20
10,150,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20
10,200,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19
20,50,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20
20,100,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20
20,150,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20
20,200,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20
30,50,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20
30,100,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20


### Export the dataset to CSV file 

In [25]:
%%time

print('Saving file on folder: ' + rootfolder)
fname = os.path.join(rootfolder, '2RPFS_Cmax_all_results.csv')
df.to_csv(fname, sep=';')
print('Saved: ' + fname)

Saving file on folder: C:\Users\czt0\Documents\doutorado_files\2RPFS_Cmax_Budget
Saved: C:\Users\czt0\Documents\doutorado_files\2RPFS_Cmax_Budget\2RPFS_Cmax_all_results.csv
Wall time: 273 ms
