# Import libraries

In [3]:
import pandas as pd
from os import listdir
from os.path import isfile, join
import ast
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np


# 1. Reading the results

We read the results from text files into Pandas DataFrames for analysis.

In [4]:
path = "./spaces/"
files = [f for f in listdir(path) if isfile(join(path, f))]

df_graank = pd.DataFrame(columns = ["Data-set", "Size", "Algorithm", "Support", "Run-time", "Memory", "Patterns", "Pattern-count", "Invalid-Patterns"])
df_aco = pd.DataFrame(columns = ["Data-set", "Size", "Algorithm", "Support", "Run-time", "Memory", "Patterns", "Pattern-count", "Invalid-Patterns", "E-Factor"])
df_lcm = pd.DataFrame(columns = ["Data-set", "Size", "Algorithm", "Support", "Run-time", "Memory", "Patterns", "Pattern-count", "Invalid-Patterns"])
df_ga = pd.DataFrame(columns = ["Data-set", "Size", "Algorithm", "Support", "Run-time", "Memory", "Patterns", "Pattern-count", "Invalid-Patterns", "Search-Space", "N-pop", "PC", "Gamma", "Mu", "Sigma", "Max-Iteration", "Max-Evaluation", "Candidates"])
df_pso = pd.DataFrame(columns = ["Data-set", "Size", "Algorithm", "Support", "Run-time", "Memory", "Patterns", "Pattern-count", "Invalid-Patterns", "Search-Space", "N-particle", "Velocity", "Coef-P", "Coef-G", "Max-Iteration", "Max-Evaluation", "Candidates"])
df_pls = pd.DataFrame(columns = ["Data-set", "Size", "Algorithm", "Support", "Run-time", "Memory", "Patterns", "Pattern-count", "Invalid-Patterns", "Search-Space", "Step-Size", "Max-Iteration", "Max-Evaluation", "Candidates"])
df_prs = pd.DataFrame(columns = ["Data-set", "Size", "Algorithm", "Support", "Run-time", "Memory", "Patterns", "Pattern-count", "Invalid-Patterns", "Search-Space", "Max-Iteration", "Max-Evaluation", "Candidates"])

for file in files:
    f_path = join(path, file)
    try:
        res = pd.read_csv(f_path, names=["A","B"], sep = ':', header=None, engine='python')
    except Exception:
        print(f_path)
    
    run = float(res['B'][0][0:6]) # run-time
    mem = float(res['B'][1][0:5]) # memory
    alg = res['B'][2] # algorithm
    
    if alg == " GRAANK ":
        att = int(res['B'][3]) # number of attributes in the data set
        size = int(res['B'][4])  # data set size

        sup = float(res['B'][5]) # minimum support
        pat = int(res['B'][7]) # patterns
        inv = int(res['B'][8]) # invalid patterns count
    elif alg == " LCM-GRAD (1.0)":
        att = int(res['B'][3]) # number of attributes in the data set
        size = int(res['B'][4])  # data set size

        sup = float(res['B'][5]) # minimum support
        pat = int(res['B'][7]) # patterns
        inv = int(res['B'][8]) # invalid patterns count
    elif alg == " ACO-GRAANK (v4.0)":
        att = int(res['B'][3]) # number of attributes in the data set
        size = int(res['B'][4])  # data set size
        
        evp = float(res['B'][5]) # evaporation-factor / erasure-probability
    
        sup = float(res['B'][7]) # minimum support
        pat = int(res['B'][9]) # patterns
        inv = int(res['B'][10]) # invalid patterns count
    else:
        ssp = res['B'][3] # search space
        att = int(res['B'][4]) # number of attributes in the data set
        size = int(res['B'][5])  # data set size
        
        if alg == " GA-GRAANK (v2.0)" or alg == " GA-GRAANK (v1.0)":

            pop = int(res['B'][6]) # Initial population
            pc = float(res['B'][7]) # children proportion
            gam = float(res['B'][8]) # Gamma
            mu = float(res['B'][9]) # Mu
            sig = float(res['B'][10]) # Sigma
            mit = int(res['B'][11]) # Maximum iteration count
            mev = int(res['B'][12]) # Maximum evaluation count
            can = str(res['B'][13]) # Candidates

            sup = float(res['B'][14]) # minimum support
            pat = int(res['B'][16]) # patterns
            inv = int(res['B'][17]) # invalid patterns count
        elif alg == " PSO-GRAANK (v2.0)" or alg == " PSO-GRAANK (v1.0)":

            pop = int(res['B'][6]) # Initial particle population
            vel = float(res['B'][7]) # velocity
            co1 = float(res['B'][8]) # Personal coefficient
            co2 = float(res['B'][9]) # Global coefficient
            mit = int(res['B'][10]) # Maximum iteration count
            mev = int(res['B'][11]) # Maximum evaluation count
            can = str(res['B'][12]) # Candidates

            sup = float(res['B'][13]) # minimum support
            pat = int(res['B'][15]) # patterns
            inv = int(res['B'][16]) # invalid patterns count
        elif alg == " PLS-GRAANK (v2.0)" or alg == " PLS-GRAANK (v1.0)":

            step = float(res['B'][6]) # Step size
            mit = int(res['B'][8]) # Maximum iteration count
            mev = int(res['B'][9]) # Maximum evaluation count
            can = str(res['B'][10]) # Candidates

            sup = float(res['B'][11]) # minimum support
            pat = int(res['B'][13]) # patterns
            inv = int(res['B'][14]) # invalid patterns count
        elif alg == " PRS-GRAANK (v2.0)" or alg == " PRS-GRAANK (v1.0)":

            mit = int(res['B'][7]) # Maximum iteration count
            mev = int(res['B'][8]) # Maximum evaluation count
            can = str(res['B'][9]) # Candidates

            sup = float(res['B'][10]) # minimum support
            pat = int(res['B'][12]) # patterns
            inv = int(res['B'][13]) # invalid patterns count
            
        # Fetch best-cost iteration values
        its = [0] * mit
        it_st = res.loc[res['B'] == ' Best Cost'].index[0] + 1 # iterations
        for i in range(mit):
            it = i + it_st
            its[i] = float(res['B'][it]) # cost
    
    # Fetch patterns
    gps = [""] * pat
    gp_pos = res.loc[res['A'] == 'Pattern '].index[0] + 1 # 1st position
    for i in range(pat):
        pos = i + gp_pos
        gps[i] = [res['A'][pos], float(res['B'][pos])]
    
    if att == 11:
        if size == 1999:
            col = "OMD"
        else:
            col = "HCV"
    elif att == 98:
        col = "C2K"
    elif att == 15:
        col = "AQY"
    elif att == 10:
        col = "BCR"
    elif att == 21:
        if size == 8074:
            col = "DIR"
        else:
            col = "CPX"
    elif att == 170:
        col = "APS"
    elif att == 9:
        col = "HPC"
    else:
        col = "** Check **"
    # print(str(att) + ': ' + str(size) + ' - ' + str(col))
        
    if alg == " GRAANK ":
        pass
    elif alg == " LCM-GRAD (1.0)":
        pass
    elif alg == " ACO-GRAANK (v4.0)":
        pass
    elif alg == " GA-GRAANK (v2.0)" or alg == " GA-GRAANK (v1.0)":
        df_ga = df_ga.append({"Data-set":col, "Size":size, "Algorithm":alg, "Support":sup, "Run-time":run, "Memory":mem, "Patterns":gps, "Pattern-count":pat, "Invalid-Patterns":inv, "Search-Space": ssp, "N-pop":pop, "PC":pc, "Gamma":gam, "Mu":mu, "Sigma":sig, "Max-Iteration":mit, "Max-Evaluation":mev, "Candidates":can}, ignore_index=True)
        df_ga['Algorithm'].replace({' GA-GRAANK (v1.0)' : 'GaGRAD-BM'}, inplace=True)
        df_ga['Algorithm'].replace({' GA-GRAANK (v2.0)' : 'GaGRAD-NU'}, inplace=True)
    elif alg == " PSO-GRAANK (v2.0)" or alg == " PSO-GRAANK (v1.0)":
        pass
    elif alg == " PLS-GRAANK (v2.0)" or alg == " PLS-GRAANK (v1.0)":
        pass
    elif alg == " PRS-GRAANK (v2.0)" or alg == " PRS-GRAANK (v1.0)":
        pass

df_ga

Unnamed: 0,Data-set,Size,Algorithm,Support,Run-time,Memory,Patterns,Pattern-count,Invalid-Patterns,Search-Space,N-pop,PC,Gamma,Mu,Sigma,Max-Iteration,Max-Evaluation,Candidates
0,AQY,9357,GaGRAD-NU,0.5,260.000,285.0,"[[['2+', '3+', '5+', '6+', '7+', '9+'] , 0.505...",3,46,Numeric,20,0.5,547.0,0.741,0.654,20,400,"[16777215, 0, 16777215, 0, 16777215, 0, 16777..."
1,DIR,8074,GaGRAD-NU,0.5,113.900,279.0,"[[['2+', '4+'] , 0.519], [['2+', '5+', '8+', '...",2,78,Numeric,8,0.5,746.0,0.842,0.681,20,160,"[4294967295, 0, 2949662255, 0, 0, 4294967295,..."
2,C2K,3942,GaGRAD-NU,0.5,88.050,301.0,"[[['0+', '23+'] , 0.505], [['0+', '1+'] , 0.518]]",2,399,Numeric,20,0.5,547.0,0.741,0.654,20,400,"[0, 1329227995784915872903807060280344575, 0,..."
3,DIR,8074,GaGRAD-BM,0.5,373.600,278.0,"[[['2+', '6+'] , 0.538]]",1,9,Bitmap,8,0.5,746.0,0.842,0.681,20,160,"[1704564397.0, 2794805598.0, 1704564393.0, 27..."
4,CPX,522,GaGRAD-BM,0.5,1.382,283.0,"[[['3+', '13+', '18+'] , 0.542]]",1,165,Bitmap,12,0.5,529.0,0.856,0.209,20,240,"[440055800902.0, 660261918793.0, 439518668118..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,HPC,10001,GaGRAD-NU,0.5,111.800,268.0,"[[['2+', '4-'] , 0.758]]",1,97,Numeric,17,0.5,527.0,0.649,0.700,20,320,"[0, 1023, 0, 1023, 0, 1023, 0, 1023, 0, 1023,..."
68,APS,2473,GaGRAD-NU,0.5,101.600,352.0,"[[['0+', '2+', '16+', '22+', '23+', '62+', '67...",1,225,Numeric,20,0.5,547.0,0.741,0.654,20,400,[71775952304054037207029499748799384310440130...
69,OMD,1999,GaGRAD-BM,0.5,5.475,270.0,"[[['3-', '4-'] , 0.557]]",1,3,Bitmap,8,0.5,746.0,0.842,0.681,20,160,"[26278.0, 22938.0, 26278.0, 22938.0, 38517.0,..."
70,HPC,10001,GaGRAD-NU,0.5,118.300,267.0,"[[['2+', '4-'] , 0.758]]",1,88,Numeric,17,0.5,527.0,0.649,0.700,20,320,"[1023, 0, 233, 0, 0, 1023, 0, 1023, 1023, 0, ..."
