# Import libraries

In [3]:
import pandas as pd
from os import listdir
from os.path import isfile, join
import ast
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np


# 1. Reading the results

We read the results from text files into Pandas DataFrames for analysis.

In [23]:
path = "./spaces/"
files = [f for f in listdir(path) if isfile(join(path, f))]

df_graank = pd.DataFrame(columns = ["Data-set", "Size", "Algorithm", "Support", "Run-time", "Memory", "Patterns", "Pattern-count", "Invalid-Patterns"])
df_aco = pd.DataFrame(columns = ["Data-set", "Size", "Algorithm", "Support", "Run-time", "Memory", "Patterns", "Pattern-count", "Invalid-Patterns", "E-Factor"])
df_lcm = pd.DataFrame(columns = ["Data-set", "Size", "Algorithm", "Support", "Run-time", "Memory", "Patterns", "Pattern-count", "Invalid-Patterns"])
df_ga = pd.DataFrame(columns = ["Data-set", "Size", "Algorithm", "Support", "Run-time", "Memory", "Patterns", "Pattern-count", "Invalid-Patterns", "Search-Space", "N-pop", "PC", "Gamma", "Mu", "Sigma", "Max-Iteration", "Max-Evaluation", "Candidates"])
df_pso = pd.DataFrame(columns = ["Data-set", "Size", "Algorithm", "Support", "Run-time", "Memory", "Patterns", "Pattern-count", "Invalid-Patterns", "Search-Space", "N-particle", "Velocity", "Coef-P", "Coef-G", "Max-Iteration", "Max-Evaluation", "Candidates"])
df_pls = pd.DataFrame(columns = ["Data-set", "Size", "Algorithm", "Support", "Run-time", "Memory", "Patterns", "Pattern-count", "Invalid-Patterns", "Search-Space", "Step-Size", "Max-Iteration", "Max-Evaluation", "Candidates"])
df_prs = pd.DataFrame(columns = ["Data-set", "Size", "Algorithm", "Support", "Run-time", "Memory", "Patterns", "Pattern-count", "Invalid-Patterns", "Search-Space", "Max-Iteration", "Max-Evaluation", "Candidates"])

for file in files:
    f_path = join(path, file)
    try:
        res = pd.read_csv(f_path, names=["A","B"], sep = ':', header=None, engine='python')
    except Exception:
        print(f_path)
    
    run = float(res['B'][0][0:6]) # run-time
    mem = float(res['B'][1][0:5]) # memory
    alg = res['B'][2] # algorithm
    # print(alg)
    
    if alg == " GRAANK ":
        att = int(res['B'][3]) # number of attributes in the data set
        size = int(res['B'][4])  # data set size

        sup = float(res['B'][5]) # minimum support
        pat = int(res['B'][7]) # patterns
        inv = int(res['B'][8]) # invalid patterns count
    elif alg == " LCM-GRAD (1.0) ":
        att = int(res['B'][3]) # number of attributes in the data set
        size = int(res['B'][4])  # data set size

        sup = float(res['B'][5]) # minimum support
        pat = int(res['B'][7]) # patterns
        inv = int(res['B'][8]) # invalid patterns count
    elif alg == " ACO-GRAANK (v4.0)":
        att = int(res['B'][3]) # number of attributes in the data set
        size = int(res['B'][4])  # data set size
        
        evp = float(res['B'][5]) # evaporation-factor / erasure-probability
    
        sup = float(res['B'][7]) # minimum support
        pat = int(res['B'][9]) # patterns
        inv = int(res['B'][10]) # invalid patterns count
    else:
        ssp = res['B'][3] # search space
        att = int(res['B'][4]) # number of attributes in the data set
        size = int(res['B'][5])  # data set size
        
        if alg == " GA-GRAANK (v2.0)" or alg == " GA-GRAANK (v1.0)":

            pop = int(res['B'][6]) # Initial population
            pc = float(res['B'][7]) # children proportion
            gam = float(res['B'][8]) # Gamma
            mu = float(res['B'][9]) # Mu
            sig = float(res['B'][10]) # Sigma
            mit = int(res['B'][11]) # Maximum iteration count
            mev = int(res['B'][12]) # Maximum evaluation count
            can = str(res['B'][13]) # Candidates

            sup = float(res['B'][14]) # minimum support
            pat = int(res['B'][16]) # patterns
            inv = int(res['B'][17]) # invalid patterns count
        elif alg == " PSO-GRAANK (v2.0)" or alg == " PSO-GRAANK (v1.0)":

            pop = int(res['B'][6]) # Initial particle population
            vel = float(res['B'][7]) # velocity
            co1 = float(res['B'][8]) # Personal coefficient
            co2 = float(res['B'][9]) # Global coefficient
            mit = int(res['B'][10]) # Maximum iteration count
            mev = int(res['B'][11]) # Maximum evaluation count
            can = str(res['B'][12]) # Candidates

            sup = float(res['B'][13]) # minimum support
            pat = int(res['B'][15]) # patterns
            inv = int(res['B'][16]) # invalid patterns count
        elif alg == " PLS-GRAANK (v2.0)" or alg == " PLS-GRAANK (v1.0)":

            step = float(res['B'][6]) # Step size
            mit = int(res['B'][8]) # Maximum iteration count
            mev = int(res['B'][9]) # Maximum evaluation count
            can = str(res['B'][10]) # Candidates

            sup = float(res['B'][11]) # minimum support
            pat = int(res['B'][13]) # patterns
            inv = int(res['B'][14]) # invalid patterns count
        elif alg == " PRS-GRAANK (v2.0)" or alg == " PRS-GRAANK (v1.0)":

            mit = int(res['B'][7]) # Maximum iteration count
            mev = int(res['B'][8]) # Maximum evaluation count
            can = str(res['B'][9]) # Candidates

            sup = float(res['B'][10]) # minimum support
            pat = int(res['B'][12]) # patterns
            inv = int(res['B'][13]) # invalid patterns count
            
        # Fetch best-cost iteration values
        its = [0] * mit
        it_st = res.loc[res['B'] == ' Best Cost'].index[0] + 1 # iterations
        for i in range(mit):
            it = i + it_st
            its[i] = float(res['B'][it]) # cost
    
    # Fetch patterns
    gps = [""] * pat
    gp_pos = res.loc[res['A'] == 'Pattern '].index[0] + 1 # 1st position
    for i in range(pat):
        pos = i + gp_pos
        gps[i] = [res['A'][pos], float(res['B'][pos])]
    
    if att == 11:
        if size == 1999:
            col = "OMD"
        else:
            col = "HCV"
    elif att == 98:
        col = "C2K"
    elif att == 15:
        col = "AQY"
    elif att == 10:
        col = "BCR"
    elif att == 21:
        if size == 8074:
            col = "DIR"
        else:
            col = "CPX"
    elif att == 170:
        col = "APS"
    elif att == 9:
        col = "HPC"
    else:
        col = "** Check **"
    # print(str(att) + ': ' + str(size) + ' - ' + str(col))
        
    if alg == " GRAANK ":
        df_graank = df_graank.append({"Data-set":col, "Size":size, "Algorithm":alg, "Support":sup, "Run-time":run, "Memory":mem, "Patterns":gps, "Pattern-count":pat, "Invalid-Patterns":inv}, ignore_index=True)
        df_graank['Algorithm'].replace({' GRAANK ' : 'GRAANK'}, inplace=True)
    elif alg == " LCM-GRAD (1.0) ":
        df_lcm = df_lcm.append({"Data-set":col, "Size":size, "Algorithm":alg, "Support":sup, "Run-time":run, "Memory":mem, "Patterns":gps, "Pattern-count":pat, "Invalid-Patterns":inv}, ignore_index=True)
        df_lcm['Algorithm'].replace({' LCM-GRAD (1.0) ' : 'LcmGRAD'}, inplace=True)
    elif alg == " ACO-GRAANK (v4.0)":
        df_aco = df_aco.append({"Data-set":col, "Size":size, "Algorithm":alg, "Support":sup, "Run-time":run, "Memory":mem, "Patterns":gps, "Pattern-count":pat, "Invalid-Patterns":inv, "E-Factor":evp}, ignore_index=True)
        df_aco['Algorithm'].replace({' ACO-GRAANK (v4.0)' : 'AcoGRAD'}, inplace=True)
    elif alg == " GA-GRAANK (v2.0)" or alg == " GA-GRAANK (v1.0)":
        df_ga = df_ga.append({"Data-set":col, "Size":size, "Algorithm":alg, "Support":sup, "Run-time":run, "Memory":mem, "Patterns":gps, "Pattern-count":pat, "Invalid-Patterns":inv, "Search-Space": ssp, "N-pop":pop, "PC":pc, "Gamma":gam, "Mu":mu, "Sigma":sig, "Max-Iteration":mit, "Max-Evaluation":mev, "Candidates":can}, ignore_index=True)
        df_ga['Algorithm'].replace({' GA-GRAANK (v1.0)' : 'GaGRAD-BM'}, inplace=True)
        df_ga['Algorithm'].replace({' GA-GRAANK (v2.0)' : 'GaGRAD-NU'}, inplace=True)
    elif alg == " PSO-GRAANK (v2.0)" or alg == " PSO-GRAANK (v1.0)":
        df_pso = df_pso.append({"Data-set":col, "Size":size, "Algorithm":alg, "Support":sup, "Run-time":run, "Memory":mem, "Patterns":gps, "Pattern-count":pat, "Invalid-Patterns":inv, "Search-Space": ssp, "N-particle":pop, "Velocity":vel, "Coef-P":co1, "Coef-G":co2, "Max-Iteration":mit, "Max-Evaluation":mev, "Candidates":can}, ignore_index=True)
        df_pso['Algorithm'].replace({' PSO-GRAANK (v1.0)' : 'PsoGRAD-BM'}, inplace=True)
        df_pso['Algorithm'].replace({' PSO-GRAANK (v2.0)' : 'PsoGRAD-NU'}, inplace=True)
    elif alg == " PLS-GRAANK (v2.0)" or alg == " PLS-GRAANK (v1.0)":
        df_pls = df_pls.append({"Data-set":col, "Size":size, "Algorithm":alg, "Support":sup, "Run-time":run, "Memory":mem, "Patterns":gps, "Pattern-count":pat, "Invalid-Patterns":inv, "Search-Space": ssp, "Step-Size":step, "Max-Iteration":mit, "Max-Evaluation":mev, "Candidates":can}, ignore_index=True)
        df_pls['Algorithm'].replace({' PLS-GRAANK (v1.0)' : 'PlsGRAD-BM'}, inplace=True)
        df_pls['Algorithm'].replace({' PLS-GRAANK (v2.0)' : 'PlsGRAD-NU'}, inplace=True)
    elif alg == " PRS-GRAANK (v2.0)" or alg == " PRS-GRAANK (v1.0)":
        df_prs = df_prs.append({"Data-set":col, "Size":size, "Algorithm":alg, "Support":sup, "Run-time":run, "Memory":mem, "Patterns":gps, "Pattern-count":pat, "Invalid-Patterns":inv, "Search-Space": ssp, "Max-Iteration":mit, "Max-Evaluation":mev, "Candidates":can}, ignore_index=True)
        df_prs['Algorithm'].replace({' PRS-GRAANK (v1.0)' : 'PrsGRAD-BM'}, inplace=True)
        df_prs['Algorithm'].replace({' PRS-GRAANK (v2.0)' : 'PrsGRAD-NU'}, inplace=True)

df_ga
#df_pso
#df_pls
#df_prs
df_graank
df_aco
df_lcm

Unnamed: 0,Data-set,Size,Algorithm,Support,Run-time,Memory,Patterns,Pattern-count,Invalid-Patterns
0,BCR,116,LcmGRAD,0.5,15.04,9746.0,"[[['8-', '4+'] , 0.8879310344827587], [['7-', ...",19,0
