In [1]:
import pandas as pd
from os import listdir
from os.path import isfile, join

# Read all results from txt files

grad_path = "./raw_files/GRAD/"
grad_files = [f for f in listdir(grad_path) if isfile(join(grad_path, f))]

gradl_path = "./raw_files/GRAD_L/"
gradl_files = [f for f in listdir(gradl_path) if isfile(join(gradl_path, f))]

gradh5_path = "./raw_files/GRAD_H5/"
gradh5_files = [f for f in listdir(gradh5_path) if isfile(join(gradh5_path, f))]

# GRAD Algorithm

column_names = ["Data-set", "Size", "Algorithm", "Support", "Run-time", "Memory", "Patterns"]

df_grad = pd.DataFrame(columns = column_names)


for file in grad_files:
    f_path = join(grad_path,file)
    res = pd.read_csv(f_path, sep = ': ', header=None, engine='python', nrows=8)
    run = float(res[1][0][0:6]) # run-time
    mem = float(res[1][1][0:5]) # memory
    pat = float(res[1][7]) # patterns
    alg = res[1][2] # algorithm
    att = int(res[1][3]) # number of attributes in the data set
    sup = float(res[1][5]) # minimum support
    size = int(res[1][4])  # data set size



    if att == 98:
        col = "C2K"
    elif att == 9:
        col = "UCI"
    else:
        col = ""
            
    df_grad = df_grad.append({"Data-set": col, "Size": size, "Algorithm":alg, "Support": sup, "Run-time":run, "Memory":mem, "Patterns":pat}, ignore_index=True)
    
    
    
# GRAD-H5 Algorithm
# column_names = ["Data-set", "Algorithm", "Support", "Run-time", "Memory", "Patterns"]

df_gradh5 = pd.DataFrame(columns = column_names)


for file in gradh5_files:
    f_path = join(gradh5_path,file)
    res = pd.read_csv(f_path, sep = ': ', header=None, engine='python', nrows=11)
    run = float(res[1][0][0:6]) # run-time
    mem = float(res[1][1][0:5]) # memory
    pat = float(res[1][10]) # patterns
    alg = res[1][2] # algorithm
    att = int(res[1][5]) # number of attributes in the data set
    sup = float(res[1][3]) # minimum support
    size = int(res[1][6])  # data set size



    if att == 98:
        col = "C2K"
    elif att == 9:
        col = "UCI"
    else:
        col = ""
            
    df_gradh5 = df_gradh5.append({"Data-set": col, "Size": size, "Algorithm":alg, "Support": sup, "Run-time":run, "Memory":mem, "Patterns":pat}, ignore_index=True)


    
#GRAD-L Algorithm

column_names = ["Data-set", "Size", "Algorithm", "Support", "Chunk-size", "Run-time", "Memory", "Patterns"]

df_gradl = pd.DataFrame(columns = column_names)


for file in gradl_files:
    f_path = join(gradl_path,file)
    res = pd.read_csv(f_path, sep = ': ', header=None, engine='python', nrows=12)
    run = float(res[1][0][0:6]) # run-time
    mem = float(res[1][1][0:5]) # memory
    pat = float(res[1][11]) # patterns
    alg = res[1][2] # algorithm
    att = int(res[1][5]) # number of attributes in the data set
    sup = float(res[1][3]) # minimum support
    size = int(res[1][6])  # data set size
    chk = int(res[1][7])  # chunk size





    if att == 98:
        col = "C2K"
    elif att == 9:
        col = "UCI"
    else:
        col = ""
            
    df_gradl = df_gradl.append({"Data-set": col, "Size": size, "Algorithm":alg, "Support": sup, "Chunk-size": chk, "Run-time":run, "Memory":mem, "Patterns":pat}, ignore_index=True)

In [None]:
# df_grad

In [None]:
# df_gradh5

In [None]:
# df_gradl

In [2]:
# Combining all results into one data-frame

frames = [df_grad, df_gradh5, df_gradl]
df_res = pd.concat(frames)
df_res

Unnamed: 0,Data-set,Size,Algorithm,Support,Run-time,Memory,Patterns,Chunk-size
0,C2K,3943,ACO-GRAANK (v4.0),0.1,677.15,173.9,2.0,
1,C2K,3943,ACO-GRAANK (v4.0),0.1,727.18,169.6,2.0,
2,C2K,3943,ACO-GRAANK (v4.0),0.1,729.63,170.3,2.0,
0,C2K,3943,ACO-GRAANK HDF5 (v7.0),0.1,3777.7,500.5,2.0,
1,UCI,116203,ACO-GRAANK HDF5 (v7.0),0.1,33108.0,427.2,2.0,
0,UCI,523104,ACO-GRAANK BD (v8.0),0.1,9213.8,299.0,1.0,100000.0
1,UCI,2075259,ACO-GRAANK BD (v8.0),0.1,187705.0,372.8,2.0,100000.0
2,C2K,3943,ACO-GRAANK BD (v8.0),0.1,15.503,507.0,1.0,100000.0
3,UCI,1000000,ACO-GRAANK BD (v8.0),0.1,39200.0,351.2,2.0,100000.0
4,UCI,116203,ACO-GRAANK BD (v8.0),0.1,454.74,284.6,2.0,100000.0


In [None]:
# Describing the results

# df_res.describe()
# df_res.groupby(["Data-set", "Support", "Algorithm"]).describe().to_excel("stats.xlsx", sheet_name="Stats")
df_res.groupby(["Data-set", "Size", "Algorithm"]).describe(percentiles=[])

In [3]:
# Ignore these columns
# df_res = df_res.drop(['Support', 'Chunk-size'], axis=1)

df = df_res.groupby(["Data-set", "Size", "Algorithm"])#.describe(percentiles=[])
df2 = pd.concat([df.min(), df.mean(), df.max(), df.std()], keys=['min', 'mean', 'max', 'std'], axis=1)
#df2.filter(like="Run-time")
df2.columns = df2.columns.swaplevel(0, 1)
df2.sort_index(axis=1, level=0, inplace=True, ascending=False)
df2.round(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Run-time,Run-time,Run-time,Run-time,Patterns,Patterns,Patterns,Patterns,Memory,Memory,Memory,Memory
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,std,min,mean,max,std,min,mean,max,std,min,mean,max
Data-set,Size,Algorithm,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
C2K,3943,ACO-GRAANK (v4.0),29.617,677.15,711.32,729.63,0.0,2.0,2.0,2.0,2.307,169.6,171.267,173.9
C2K,3943,ACO-GRAANK BD (v8.0),0.795,15.149,15.773,16.668,1.0,0.0,1.0,2.0,45.679,435.5,487.667,520.5
C2K,3943,ACO-GRAANK HDF5 (v7.0),,3777.7,3777.7,3777.7,,2.0,2.0,2.0,,500.5,500.5,500.5
UCI,116203,ACO-GRAANK BD (v8.0),63.772,454.74,524.787,579.48,0.0,2.0,2.0,2.0,15.312,258.7,276.367,285.8
UCI,116203,ACO-GRAANK HDF5 (v7.0),,33108.0,33108.0,33108.0,,2.0,2.0,2.0,,427.2,427.2,427.2
UCI,523104,ACO-GRAANK BD (v8.0),1251.013,9213.8,10098.4,10983.0,0.707,0.0,0.5,1.0,26.022,262.2,280.6,299.0
UCI,1000000,ACO-GRAANK BD (v8.0),481.54,39200.0,39540.5,39881.0,0.0,2.0,2.0,2.0,0.0,351.2,351.2,351.2
UCI,2075259,ACO-GRAANK BD (v8.0),22113.287,145958.0,162616.667,187705.0,0.577,1.0,1.333,2.0,5.605,361.6,367.333,372.8
