In [1]:
import pandas as pd
from os import listdir
from os.path import isfile, join

# Read all results from txt files

grad_path = "./raw_files/GRAD/"
grad_files = [f for f in listdir(grad_path) if isfile(join(grad_path, f))]

gradl_path = "./raw_files/GRAD_L/"
gradl_files = [f for f in listdir(gradl_path) if isfile(join(gradl_path, f))]

gradh5_path = "./raw_files/GRAD_H5/"
gradh5_files = [f for f in listdir(gradh5_path) if isfile(join(gradh5_path, f))]

# GRAD Algorithm

column_names = ["Data-set", "Size", "Algorithm", "Support", "Run-time", "Memory", "Patterns"]

df_grad = pd.DataFrame(columns = column_names)


for file in grad_files:
    f_path = join(grad_path,file)
    res = pd.read_csv(f_path, sep = ': ', header=None, engine='python', nrows=8)
    run = float(res[1][0][0:6]) # run-time
    mem = float(res[1][1][0:5]) # memory
    pat = float(res[1][7]) # patterns
    alg = res[1][2] # algorithm
    att = int(res[1][3]) # number of attributes in the data set
    sup = float(res[1][5]) # minimum support
    size = int(res[1][4])  # data set size



    if att == 98:
        col = "C2K"
    elif att == 9:
        col = "UCI"
    else:
        col = ""
            
    df_grad = df_grad.append({"Data-set": col, "Size": size, "Algorithm":alg, "Support": sup, "Run-time":run, "Memory":mem, "Patterns":pat}, ignore_index=True)
    
    
    
# GRAD-H5 Algorithm
# column_names = ["Data-set", "Algorithm", "Support", "Run-time", "Memory", "Patterns"]

df_gradh5 = pd.DataFrame(columns = column_names)


for file in gradh5_files:
    f_path = join(gradh5_path,file)
    res = pd.read_csv(f_path, sep = ': ', header=None, engine='python', nrows=11)
    run = float(res[1][0][0:6]) # run-time
    mem = float(res[1][1][0:5]) # memory
    pat = float(res[1][10]) # patterns
    alg = res[1][2] # algorithm
    att = int(res[1][5]) # number of attributes in the data set
    sup = float(res[1][3]) # minimum support
    size = int(res[1][6])  # data set size



    if att == 98:
        col = "C2K"
    elif att == 9:
        col = "UCI"
    else:
        col = ""
            
    df_gradh5 = df_gradh5.append({"Data-set": col, "Size": size, "Algorithm":alg, "Support": sup, "Run-time":run, "Memory":mem, "Patterns":pat}, ignore_index=True)


    
#GRAD-L Algorithm

column_names = ["Data-set", "Size", "Algorithm", "Support", "Chunk-size", "Run-time", "Memory", "Patterns"]

df_gradl = pd.DataFrame(columns = column_names)


for file in gradl_files:
    f_path = join(gradl_path,file)
    res = pd.read_csv(f_path, sep = ': ', header=None, engine='python', nrows=12)
    run = float(res[1][0][0:6]) # run-time
    mem = float(res[1][1][0:5]) # memory
    pat = float(res[1][11]) # patterns
    alg = res[1][2] # algorithm
    att = int(res[1][5]) # number of attributes in the data set
    sup = float(res[1][3]) # minimum support
    size = int(res[1][6])  # data set size
    chk = int(res[1][7])  # chunk size





    if att == 98:
        col = "C2K"
    elif att == 9:
        col = "UCI"
    else:
        col = ""
            
    df_gradl = df_gradl.append({"Data-set": col, "Size": size, "Algorithm":alg, "Support": sup, "Chunk-size": chk, "Run-time":run, "Memory":mem, "Patterns":pat}, ignore_index=True)

In [None]:
# df_grad

In [2]:
# Combining all results into one data-frame

frames = [df_grad, df_gradh5, df_gradl]
df_res = pd.concat(frames)
# df_res

# Describing the results

# df_res.groupby(["Data-set", "Support", "Algorithm"]).describe().to_excel("stats.xlsx", sheet_name="Stats")
# df_res.groupby(["Data-set", "Size", "Algorithm"]).describe(percentiles=[])

In [3]:
# Ignore these columns
# df_res = df_res.drop(['Support', 'Chunk-size'], axis=1)
df_res2 = df_res[['Data-set', 'Size', 'Algorithm', 'Run-time', 'Patterns', 'Memory']]

df = df_res2.groupby(["Data-set", "Size", "Algorithm"])#.describe(percentiles=[])
df2 = pd.concat([df.min(), df.mean(), df.max(), df.std()], keys=['min', 'mean', 'max', 'std'], axis=1)
#df2.filter(like="Run-time")
df2.columns = df2.columns.swaplevel(0, 1)
df2.sort_index(axis=1, level=0, inplace=True, ascending=False)
df2.round(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Run-time,Run-time,Run-time,Run-time,Patterns,Patterns,Patterns,Patterns,Memory,Memory,Memory,Memory
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,std,min,mean,max,std,min,mean,max,std,min,mean,max
Data-set,Size,Algorithm,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
C2K,3943,ACO-GRAANK (v4.0),24.125,677.15,702.536,729.63,0.0,2.0,2.0,2.0,2.044,168.9,172.089,174.5
C2K,3943,ACO-GRAANK BD (v8.0),0.653,15.149,15.821,16.668,0.894,0.0,1.4,2.0,38.643,435.5,501.2,536.1
C2K,3943,ACO-GRAANK HDF5 (v7.0),12.162,3777.7,3786.3,3794.9,0.0,2.0,2.0,2.0,4.313,494.4,497.45,500.5
UCI,10000,ACO-GRAANK (v4.0),1.448,49.527,51.682,53.323,0.408,1.0,1.833,2.0,0.564,108.9,109.617,110.6
UCI,10000,ACO-GRAANK BD (v8.0),0.63,4.31,5.017,6.058,0.516,1.0,1.333,2.0,1.089,289.8,291.35,292.6
UCI,10000,ACO-GRAANK HDF5 (v7.0),98.794,6.274,47.162,248.82,0.0,2.0,2.0,2.0,118.713,122.9,172.383,414.7
UCI,116203,ACO-GRAANK BD (v8.0),63.772,454.74,524.787,579.48,0.0,2.0,2.0,2.0,15.312,258.7,276.367,285.8
UCI,116203,ACO-GRAANK HDF5 (v7.0),143.543,33108.0,33209.5,33311.0,0.0,2.0,2.0,2.0,0.566,427.2,427.6,428.0
UCI,523104,ACO-GRAANK BD (v8.0),1716.374,9213.8,10947.6,12646.0,1.0,0.0,1.0,2.0,22.228,262.2,287.8,302.2
UCI,1000000,ACO-GRAANK BD (v8.0),367.723,39200.0,39460.333,39881.0,0.577,1.0,1.667,2.0,1.386,348.8,350.4,351.2


In [4]:
df = df_res2.groupby(["Data-set", "Size", "Algorithm"])#.describe(percentiles=[])
df2 = pd.concat([df.mean(), df.std()], keys=['mean', 'std'], axis=1)
#df2.filter(like="Run-time")
df2.columns = df2.columns.swaplevel(0, 1)
df2.sort_index(axis=1, level=0, inplace=True, ascending=False)
df2.round(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Run-time,Run-time,Patterns,Patterns,Memory,Memory
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,std,mean,std,mean,std,mean
Data-set,Size,Algorithm,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
C2K,3943,ACO-GRAANK (v4.0),24.125,702.536,0.0,2.0,2.044,172.089
C2K,3943,ACO-GRAANK BD (v8.0),0.653,15.821,0.894,1.4,38.643,501.2
C2K,3943,ACO-GRAANK HDF5 (v7.0),12.162,3786.3,0.0,2.0,4.313,497.45
UCI,10000,ACO-GRAANK (v4.0),1.448,51.682,0.408,1.833,0.564,109.617
UCI,10000,ACO-GRAANK BD (v8.0),0.63,5.017,0.516,1.333,1.089,291.35
UCI,10000,ACO-GRAANK HDF5 (v7.0),98.794,47.162,0.0,2.0,118.713,172.383
UCI,116203,ACO-GRAANK BD (v8.0),63.772,524.787,0.0,2.0,15.312,276.367
UCI,116203,ACO-GRAANK HDF5 (v7.0),143.543,33209.5,0.0,2.0,0.566,427.6
UCI,523104,ACO-GRAANK BD (v8.0),1716.374,10947.6,1.0,1.0,22.228,287.8
UCI,1000000,ACO-GRAANK BD (v8.0),367.723,39460.333,0.577,1.667,1.386,350.4
