In [1]:
# rank the obtained results using the *.log files
import os
import pandas as pd
import numpy as np

In [2]:
source = "3c"
targetdir = '../../data/' + source + "/"
filelist = sorted(os.listdir(targetdir))

In [3]:
filelist

['1.data', '2.data', '3.data', '4.data', '5.data', '6.data', '7.data']

In [4]:
# Create dataframe from files
df = pd.DataFrame()

for file in filelist:
    filename = targetdir+file
    col_name = [file]
    temp_df = pd.read_csv(filename,names=col_name)    
    df = pd.concat([df, temp_df], axis=1)
    
# Look at the data
df.head()

Unnamed: 0,1.data,2.data,3.data,4.data,5.data,6.data,7.data
0,1.0,1.0,1.0,1.0,0.73,1.0,1.0
1,1.06,1.16,0.0,1.2,0.73,0.0,0.0
2,1.12,1.22,0.0,1.33,0.73,0.0,0.0
3,0.99,0.83,0.91,0.83,0.63,0.91,1.0
4,0.99,0.83,0.91,0.83,0.63,0.0,0.0


In [5]:
# Clip values > 1 with 1 and ignore 0s
df.mask(df > 1, 1, inplace=True)
df.mask(df <= 0, np.NaN, inplace=True)

In [6]:
# Count NaN values
df.isna().sum()

1.data     18
2.data      0
3.data    324
4.data    288
5.data      0
6.data    261
7.data    324
dtype: int64

In [7]:
df

Unnamed: 0,1.data,2.data,3.data,4.data,5.data,6.data,7.data
0,1.00,1.00,1.00,1.00,0.73,1.00,1.0
1,1.00,1.00,,1.00,0.73,,
2,1.00,1.00,,1.00,0.73,,
3,0.99,0.83,0.91,0.83,0.63,0.91,1.0
4,0.99,0.83,0.91,0.83,0.63,,
...,...,...,...,...,...,...,...
724,1.00,0.87,,,0.65,0.88,1.0
725,1.00,0.87,,,0.65,,
726,1.00,0.74,0.99,1.00,0.54,0.50,1.0
727,1.00,0.74,0.99,1.00,0.54,0.50,1.0


In [8]:
# Save processed dataframe as csv file
df.to_csv("../../data/processed/asymmetric/" + source +".csv",index=False)

In [9]:
# Creating ranked dataframe
ranked_df = pd.DataFrame()
stats_df = pd.DataFrame()

In [10]:
# going through every column
for column in df:  
    wwtp = column[0]
    
    # In every column, drop na values
    asym_column = df[column].dropna()
    
    # and calculate individual tao
    tao = len(asym_column)
    
    # calculate mean 
    avg_eff = round(asym_column.mean(),3)
    
    # calculate max
    max_eff = round(asym_column.max(),3)
    
    # calculate min
    min_eff = round(asym_column.min(),3)
    
    # calculate amplitude 
    amplitude = round((max_eff - min_eff)*100,2)
    
    amp_str = "Amplitude (max-min)(%)"
    
    # print stats results
    print("WWTP",wwtp,
          "\nMean =",avg_eff,"Maximum =",max_eff,"Minimum =",min_eff, amp_str,"=",amplitude)
    
    stats_df = stats_df.append({ 'WWTP': wwtp,
        "Mean": avg_eff, "Maximum" : max_eff, "Minimum": min_eff, amp_str: amplitude},ignore_index=True)
    
    # Calculating Sk sum of factors
    Sk = round(asym_column.sum(),3)    
    
    # Calculating ek sum of factors of 1 (or above if errors in calculation)
    ek = asym_column >= 1
    ek = ek.sum()
        
    # Calculating R1k ek/tao
    R1k = round(ek/tao,3)
    
    # Calculate R2k
    if tao != ek:
        R2k = (Sk - ek)/(tao - ek)
    elif R1k == 1:
        R2k = 0
        
    R2k = round(R2k,3)
    
    # Printing results    
    print("tao =",tao,"| ek =",ek,"| R1k =",R1k, "| Sk =",Sk, "| R2k =",R2k,"\n")    
    
    # Populate ranking dataframe using pd.df.append
    # Using unicode to name columns with super and subscripts
    R1k_col = 'R\u00B9\u2096\u2080'
    R2k_col = 'R\u00B2\u2096\u2080'
    ranked_df = ranked_df.append({ R2k_col:R2k, R1k_col: R1k,'WWTP': wwtp},ignore_index=True)  

WWTP 1 
Mean = 0.992 Maximum = 1.0 Minimum = 0.95 Amplitude (max-min)(%) = 5.0
tao = 711 | ek = 460 | R1k = 0.647 | Sk = 705.53 | R2k = 0.978 

WWTP 2 
Mean = 0.859 Maximum = 1.0 Minimum = 0.7 Amplitude (max-min)(%) = 30.0
tao = 729 | ek = 223 | R1k = 0.306 | Sk = 625.95 | R2k = 0.796 

WWTP 3 
Mean = 0.919 Maximum = 1.0 Minimum = 0.76 Amplitude (max-min)(%) = 24.0
tao = 405 | ek = 102 | R1k = 0.252 | Sk = 372.33 | R2k = 0.892 

WWTP 4 
Mean = 0.885 Maximum = 1.0 Minimum = 0.68 Amplitude (max-min)(%) = 32.0
tao = 441 | ek = 144 | R1k = 0.327 | Sk = 390.24 | R2k = 0.829 

WWTP 5 
Mean = 0.64 Maximum = 0.79 Minimum = 0.51 Amplitude (max-min)(%) = 28.0
tao = 729 | ek = 0 | R1k = 0.0 | Sk = 466.74 | R2k = 0.64 

WWTP 6 
Mean = 0.68 Maximum = 1.0 Minimum = 0.48 Amplitude (max-min)(%) = 52.0
tao = 468 | ek = 61 | R1k = 0.13 | Sk = 318.11 | R2k = 0.632 

WWTP 7 
Mean = 0.995 Maximum = 1.0 Minimum = 0.97 Amplitude (max-min)(%) = 3.0
tao = 405 | ek = 291 | R1k = 0.719 | Sk = 403.14 | R2k = 0.98

## Ranking of WWTP

In [11]:
# Reorder columns to be usable as a results table
ranked_df = ranked_df.reindex(columns=['WWTP',R1k_col, R2k_col])

In [12]:
ranked_df

Unnamed: 0,WWTP,R¹ₖ₀,R²ₖ₀
0,1,0.647,0.978
1,2,0.306,0.796
2,3,0.252,0.892
3,4,0.327,0.829
4,5,0.0,0.64
5,6,0.13,0.632
6,7,0.719,0.984


In [13]:
import os

path = "../../results/" + source + "/asymmetric"

# Save rankings dataframe as csv file

try:
    ranked_df.to_csv(path + "/ranking.csv",index=False)
    print("Save succesful")
except:
    print("Creating folder and saving")
    os.mkdir(path)
    ranked_df.to_csv(path + "/ranking.csv",index=False)

Creating folder and saving


## Calculate Descriptive Statistics

In [14]:
# Calculate the mean of every column
mean_mean = round(stats_df.Mean.mean(),3)
mean_max = round(stats_df.Maximum.mean(),3)
mean_min = round(stats_df.Minimum.mean(),3)
mean_amp = round(stats_df[amp_str].mean(),3)

In [15]:
# Add means to stats dataframe
stats_df = stats_df.append({ 'WWTP': "Mean", "Mean" : mean_mean, "Maximum" : mean_max, 
                            "Minimum" : mean_min, amp_str : mean_amp},ignore_index=True)

In [16]:
# Calculate the standard deviation of every column
sd_mean = round(stats_df.Mean.std(),3)
sd_max = round(stats_df.Maximum.std(),3)
sd_min = round(stats_df.Minimum.std(),3)
sd_amp = round(stats_df[amp_str].std(),3)

In [17]:
# Add means to stats dataframe
stats_df = stats_df.append({ 'WWTP': "SD", "Mean" : sd_mean, "Maximum" : sd_max, 
                            "Minimum" : sd_min, amp_str : sd_amp},ignore_index=True)

In [18]:
# Reorder columns
stats_df = stats_df.reindex(columns=["WWTP", "Mean", "Maximum", "Minimum", amp_str])

In [19]:
stats_df

Unnamed: 0,WWTP,Mean,Maximum,Minimum,Amplitude (max-min)(%)
0,1,0.992,1.0,0.95,5.0
1,2,0.859,1.0,0.7,30.0
2,3,0.919,1.0,0.76,24.0
3,4,0.885,1.0,0.68,32.0
4,5,0.64,0.79,0.51,28.0
5,6,0.68,1.0,0.48,52.0
6,7,0.995,1.0,0.97,3.0
7,Mean,0.853,0.97,0.721,24.857
8,SD,0.131,0.073,0.178,15.569


In [20]:
# Save statistics dataframe as csv file
stats_df.to_csv(path + "/statistics.csv",index=False)

In [21]:
# Convert Jupyter Notebook to PDF LaTeX file
!jupyter-nbconvert --to pdf "clip-max-ignore-zeros-custom.ipynb" --output-dir "../../results/3c/asymmetric"

[NbConvertApp] Converting notebook clip-max-ignore-zeros-custom.ipynb to pdf
[NbConvertApp] Writing 45170 bytes to ./notebook.tex
[NbConvertApp] Building PDF
[NbConvertApp] Running xelatex 3 times: ['xelatex', './notebook.tex', '-quiet']
[NbConvertApp] Running bibtex 1 time: ['bibtex', './notebook']
[NbConvertApp] PDF successfully created
[NbConvertApp] Writing 47154 bytes to ../../results/3b/asymmetric/clip-max-ignore-zeros-custom.pdf
