# Import libraries

In [1]:
import pandas as pd
from os import listdir
from os.path import isfile, join
import ast
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np


# 1. Reading the results

We read the results from text files into Pandas DataFrames for analysis.

In [None]:
path = "./spaces/"
files = [f for f in listdir(path) if isfile(join(path, f))]

df_graank = pd.DataFrame(columns = ["Data-set", "Size", "Algorithm", "Support", "Run-time", "Memory", "Pattern-count", "Max-iteration", "Invalid-Patterns"])
df_aco = pd.DataFrame(columns = ["Data-set", "Size", "Algorithm", "Support", "Run-time", "Memory", "Pattern-count", "Max-iteration", "Invalid-Patterns", "E-Factor"])
df_lcm = pd.DataFrame(columns = ["Data-set", "Size", "Algorithm", "Support", "Run-time", "Memory", "Pattern-count", "Max-iteration", "Invalid-Patterns"])
df_ga = pd.DataFrame(columns = ["Data-set", "Size", "Algorithm", "Support", "Run-time", "Memory", "Pattern-count", "Max-iteration", "Invalid-Patterns", "N-pop", "PC", "Gamma", "Mu", "Sigma", "Max-Iteration", "Max-Evaluation", "Candidates"])
df_pso = pd.DataFrame(columns = ["Data-set", "Size", "Algorithm", "Support", "Run-time", "Memory", "Pattern-count", "Max-iteration", "Invalid-Patterns", "N-particle", "Velocity", "Coef-P", "Coef-G", "Max-Iteration", "Max-Evaluation", "Candidates"])
df_pls = pd.DataFrame(columns = ["Data-set", "Size", "Algorithm", "Support", "Run-time", "Memory", "Pattern-count", "Max-iteration", "Invalid-Patterns", "Step-Size", "Max-Iteration", "Max-Evaluation", "Candidates"])
df_prs = pd.DataFrame(columns = ["Data-set", "Size", "Algorithm", "Support", "Run-time", "Memory", "Pattern-count", "Max-iteration", "Invalid-Patterns", "Max-Iteration", "Max-Evaluation", "Candidates"])

for file in files:
    f_path = join(path, file)
    try:
        res = pd.read_csv(f_path, names=["A","B"], sep = ':', header=None, engine='python')
    except Exception:
        print(f_path)
    
    run = float(res['B'][0][0:6]) # run-time
    mem = float(res['B'][1][0:5]) # memory
    alg = res['B'][2] # algorithm
    
    if alg == " GRAANK ":
        att = int(res['B'][3]) # number of attributes in the data set
        size = int(res['B'][4])  # data set size

        sup = float(res['B'][5]) # minimum support
        pat = int(res['B'][7]) # patterns
        inv = int(res['B'][8]) # invalid patterns count
    elif alg == " LCM-GRAANK ":
        att = int(res['B'][3]) # number of attributes in the data set
        size = int(res['B'][4])  # data set size

        sup = float(res['B'][5]) # minimum support
        pat = int(res['B'][7]) # patterns
        inv = int(res['B'][8]) # invalid patterns count
    elif alg == " ACO-GRAANK ":
        att = int(res['B'][3]) # number of attributes in the data set
        size = int(res['B'][4])  # data set size
        
        evp = float(res['B'][5]) # evaporation-factor / erasure-probability
    
        sup = float(res['B'][7]) # minimum support
        pat = int(res['B'][9]) # patterns
        inv = int(res['B'][10]) # invalid patterns count
    elif alg == " GA-GRAANK ":
        ssp = res['B'][3] # search space
        att = int(res['B'][4]) # number of attributes in the data set
        size = int(res['B'][5])  # data set size
        
        pop = int(res['B'][6]) # Initial population
        pc = float(res['B'][7]) # children proportion
        gam = float(res['B'][8]) # Gamma
        mu = float(res['B'][9]) # Mu
        sig = float(res['B'][10]) # Sigma
        mit = int(res['B'][11]) # Maximum iteration count
        mev = int(res['B'][12]) # Maximum evaluation count
        can = str(res['B'][13]) # Candidates
        
        sup = float(res['B'][14]) # minimum support
        pat = float(res['B'][16]) # patterns
        inv = int(res['B'][17]) # invalid patterns count
    elif alg == " PSO-GRAANK ":
        ssp = res['B'][3] # search space
        att = int(res['B'][4]) # number of attributes in the data set
        size = int(res['B'][5])  # data set size
        
        pop = int(res['B'][6]) # Initial particle population
        vel = float(res['B'][7]) # velocity
        co1 = float(res['B'][8]) # Personal coefficient
        co2 = float(res['B'][9]) # Global coefficient
        mit = int(res['B'][10]) # Maximum iteration count
        mev = int(res['B'][11]) # Maximum evaluation count
        can = str(res['B'][12]) # Candidates
        
        sup = float(res['B'][13]) # minimum support
        pat = float(res['B'][15]) # patterns
        inv = int(res['B'][16]) # invalid patterns count
    elif alg == " PLS-GRAANK ":
        ssp = res['B'][3] # search space
        att = int(res['B'][4]) # number of attributes in the data set
        size = int(res['B'][5])  # data set size
        
        step = float(res['B'][6]) # Step size
        mit = int(res['B'][8]) # Maximum iteration count
        mev = int(res['B'][9]) # Maximum evaluation count
        can = str(res['B'][10]) # Candidates
        
        sup = float(res['B'][11]) # minimum support
        pat = float(res['B'][14]) # patterns
        inv = int(res['B'][14]) # invalid patterns count
    elif alg == " PRS-GRAANK ":
        ssp = res['B'][3] # search space
        att = int(res['B'][4]) # number of attributes in the data set
        size = int(res['B'][5])  # data set size
        
        mit = int(res['B'][7]) # Maximum iteration count
        mev = int(res['B'][8]) # Maximum evaluation count
        can = str(res['B'][9]) # Candidates
        
        sup = float(res['B'][10]) # minimum support
        pat = float(res['B'][12]) # patterns
        inv = int(res['B'][13]) # invalid patterns count
            
    # Fetch best-cost iteration values
    its = [0] * mit
    it_st = res.loc[res['B'] == ' Best Cost'].index[0] + 1 # iterations
    for i in range(mit):
        it = i + it_st
        its[i] = float(res['B'][it]) # cost
    
    if att == 14:
        col = "HCV"
    elif att == 98:
        col = "C2K"
    elif att == 20:
        col = "CPX"
    elif att == 13:
        col = "AQY"
    elif att == 10:
        col = "BCR"
    elif att == 21:
        col = "DIR"
    elif att == 170:
        col = "APS"
    elif att == 9:
        col = "HPC"
    elif att == 11:
        col = "OMD"
    else:
        col = "** Check **"