In [None]:
import pandas as pd
from os import listdir
from os.path import isfile, join

# Read all results from txt files

grad_path = "./raw_files/GRAD/"
grad_files = [f for f in listdir(grad_path) if isfile(join(grad_path, f))]

gradl_path = "./raw_files/GRAD_L/"
gradl_files = [f for f in listdir(gradl_path) if isfile(join(gradl_path, f))]

gradh5_path = "./raw_files/GRAD_H5/"
gradh5_files = [f for f in listdir(gradh5_path) if isfile(join(gradh5_path, f))]

# GRAD Algorithm

column_names = ["Data-set", "Size", "Algorithm", "Support", "Run-time", "Memory", "Patterns"]

df_grad = pd.DataFrame(columns = column_names)


for file in grad_files:
    f_path = join(grad_path,file)
    res = pd.read_csv(f_path, sep = ': ', header=None, engine='python', nrows=8)
    run = float(res[1][0][0:6]) # run-time
    mem = float(res[1][1][0:5]) # memory
    pat = float(res[1][7]) # patterns
    alg = res[1][2] # algorithm
    att = int(res[1][3]) # number of attributes in the data set
    sup = float(res[1][5]) # minimum support
    size = int(res[1][4])  # data set size



    if att == 98:
        col = "C2K"
    elif att == 9:
        col = "UCI"
    else:
        col = ""
            
    df_grad = df_grad.append({"Data-set": col, "Size": size, "Algorithm":alg, "Support": sup, "Run-time":run, "Memory":mem, "Patterns":pat}, ignore_index=True)
    
    
    
# GRAD-H5 Algorithm
# column_names = ["Data-set", "Algorithm", "Support", "Run-time", "Memory", "Patterns"]

df_gradh5 = pd.DataFrame(columns = column_names)


for file in gradh5_files:
    f_path = join(gradh5_path,file)
    res = pd.read_csv(f_path, sep = ': ', header=None, engine='python', nrows=11)
    run = float(res[1][0][0:6]) # run-time
    mem = float(res[1][1][0:5]) # memory
    pat = float(res[1][10]) # patterns
    alg = res[1][2] # algorithm
    att = int(res[1][5]) # number of attributes in the data set
    sup = float(res[1][3]) # minimum support
    size = int(res[1][6])  # data set size



    if att == 98:
        col = "C2K"
    elif att == 9:
        col = "UCI"
    else:
        col = ""
            
    df_gradh5 = df_gradh5.append({"Data-set": col, "Size": size, "Algorithm":alg, "Support": sup, "Run-time":run, "Memory":mem, "Patterns":pat}, ignore_index=True)


    
#GRAD-L Algorithm

column_names = ["Data-set", "Size", "Algorithm", "Support", "Chunk-size", "Run-time", "Memory", "Patterns"]

df_gradl = pd.DataFrame(columns = column_names)


for file in gradl_files:
    f_path = join(gradl_path,file)
    res = pd.read_csv(f_path, sep = ': ', header=None, engine='python', nrows=12)
    run = float(res[1][0][0:6]) # run-time
    mem = float(res[1][1][0:5]) # memory
    pat = float(res[1][11]) # patterns
    alg = res[1][2] # algorithm
    att = int(res[1][5]) # number of attributes in the data set
    sup = float(res[1][3]) # minimum support
    size = int(res[1][6])  # data set size
    chk = int(res[1][7])  # chunk size





    if att == 98:
        col = "C2K"
    elif att == 9:
        col = "UCI"
    else:
        col = ""
            
    df_gradl = df_gradl.append({"Data-set": col, "Size": size, "Algorithm":alg, "Support": sup, "Chunk-size": chk, "Run-time":run, "Memory":mem, "Patterns":pat}, ignore_index=True)

In [None]:
# df_grad

In [None]:
# Combining all results into one data-frame

frames = [df_grad, df_gradh5, df_gradl]
df_res = pd.concat(frames)
# df_res

# Describing the results

# df_res.groupby(["Data-set", "Support", "Algorithm"]).describe().to_excel("stats.xlsx", sheet_name="Stats")
# df_res.groupby(["Data-set", "Size", "Algorithm"]).describe(percentiles=[])

In [None]:
# Ignore these columns
# df_res = df_res.drop(['Support', 'Chunk-size'], axis=1)
df_res2 = df_res[['Data-set', 'Size', 'Algorithm', 'Run-time', 'Patterns', 'Memory']]

df = df_res2.groupby(["Data-set", "Size", "Algorithm"])#.describe(percentiles=[])
df2 = pd.concat([df.min(), df.mean(), df.max(), df.std()], keys=['min', 'mean', 'max', 'std'], axis=1)
#df2.filter(like="Run-time")
df2.columns = df2.columns.swaplevel(0, 1)
df2.sort_index(axis=1, level=0, inplace=True, ascending=False)
df2.round(3)