In [1]:
import numpy as np
import matplotlib.pyplot as plt 
import pandas as pd 
from LazyFCA import cross_validation, preprocessing

# Loading and preprocessing datasets

## First data set: Tic-Tac-Toe End game Dataset UCI
### link: https://archive.ics.uci.edu/ml/datasets/Tic-Tac-Toe+Endgame


In [2]:
df_TTT = pd.read_csv('Data/tic-tac-toe-endgame.csv')
target_column_TTT = 'V10'
target_dict = {'negative':0, 'positive':1}
shuffle = True

## Second data set: Mushroom Classification

In [4]:
df_MC = pd.read_csv('Data/Mushrooms/mushrooms.csv')
target_column_MC = 'class'
target_dict = {'p':0, 'e':1}
shuffle = True

Preprocessing data: from categorical to binary features

In [3]:
data_TTT = preprocessing(df=df_TTT, target_column='V10', target_dict=target_dict, shuffle=shuffle)

In [5]:
data_MC = preprocessing(df=df_MC, target_column='class', target_dict=target_dict, shuffle=shuffle)

data_MC dataset is too big, we use only 1000 random examples.

In [7]:
data_MC_short = data_MC[:1000]

# Base algorithm

## Tic-Tac-Toe End game Dataset UCI

In [24]:
Alg1_TTT = cross_validation(data_TTT, target_column='V10',
                            Kfolds=3, shuffle=True, model='FCA',
                                 model_params= {'max_int':1, 'min_elems':0,'balance':False, 'prop':1})

100%|██████████| 319/319 [04:08<00:00,  1.10it/s]
100%|██████████| 319/319 [04:16<00:00,  1.21it/s]
100%|██████████| 320/320 [04:17<00:00,  1.23it/s]


In [27]:
Alg1_TTT_df = pd.DataFrame(Alg1_TTT)
Alg1_TTT_df['Time'] = ['04:08', '04:16', '04:17']
Alg1_TTT_df = Alg1_TTT_df.reset_index()

In [28]:
Alg1_TTT_df

Unnamed: 0,index,accuracy,precision,recall,ROC_AUC,Time
0,0,0.69906,0.676768,1.0,0.59322,04:08
1,1,0.777429,0.749104,0.995238,0.676518,04:16
2,2,0.7875,0.763441,0.990698,0.681063,04:17


## Mushroom Classification

In [19]:
Alg1_MC = cross_validation(data_MC_short, target_column='class',
                            Kfolds=3, shuffle=True, model='FCA',
                                 model_params= {'max_int':1, 'min_elems':0,'balance':False, 'prop':1})

100%|██████████| 333/333 [22:46<00:00,  3.64s/it]
100%|██████████| 333/333 [21:56<00:00,  4.07s/it]
100%|██████████| 334/334 [23:16<00:00,  3.69s/it]


In [20]:
Alg1_MC_df = pd.DataFrame(Alg1_MC)
Alg1_MC_df['Time'] = ['22:46', '21:56', '23:16']
Alg1_MC_df = Alg1_MC_df.reset_index()

In [21]:
Alg1_MC_df

Unnamed: 0,index,accuracy,precision,recall,ROC_AUC,Time
0,0,0.990991,0.982249,1.0,0.991018,22:46
1,1,1.0,1.0,1.0,1.0,21:56
2,2,1.0,1.0,1.0,1.0,23:16


# Second algorithm

## Tic-Tac-Toe End game Dataset UCI

In [30]:
Alg2_TTT = cross_validation(data_TTT, target_column='V10',
                            Kfolds=3, shuffle=True, model='FCA',
                                 model_params= {'max_int':1, 'min_elems':0.7,'balance':False, 'prop':1})

100%|██████████| 319/319 [01:21<00:00,  4.07it/s]
100%|██████████| 319/319 [01:26<00:00,  3.54it/s]
100%|██████████| 320/320 [01:19<00:00,  4.13it/s]


In [34]:
Alg2_TTT_df = pd.DataFrame(Alg2_TTT)
Alg2_TTT_df['Time'] = ['01:21', '01:26', '01:19']
Alg2_TTT_df = Alg2_TTT_df.reset_index()

In [35]:
Alg2_TTT_df

Unnamed: 0,index,accuracy,precision,recall,ROC_AUC,Time
0,0,0.739812,0.705674,1.0,0.654167,01:21
1,1,0.824451,0.796364,1.0,0.72,01:26
2,2,0.765625,0.734982,1.0,0.665179,01:19


## Mushroom Classification

In [38]:
Alg2_MC = cross_validation(data_MC_short, target_column='class',
                            Kfolds=3, shuffle=True, model='FCA',
                                 model_params= {'max_int':1, 'min_elems':0.9,'balance':False, 'prop':1})

100%|██████████| 333/333 [03:11<00:00,  1.57it/s]
100%|██████████| 333/333 [03:02<00:00,  2.72it/s]
100%|██████████| 334/334 [03:04<00:00,  1.68it/s]


In [39]:
Alg2_MC_df = pd.DataFrame(Alg2_MC)
Alg2_MC_df['Time'] = ['03:11', '03:02', '03:04']
Alg2_MC_df = Alg2_MC_df.reset_index()

In [40]:
Alg2_MC_df

Unnamed: 0,index,accuracy,precision,recall,ROC_AUC,Time
0,0,0.981982,0.965318,1.0,0.981928,03:11
1,1,0.981982,0.967213,1.0,0.980769,03:02
2,2,0.982036,0.966851,1.0,0.981132,03:04


# Third algorithm

## Tic-Tac-Toe End game Dataset UCI

In [43]:
Alg3_TTT = cross_validation(data_TTT, target_column='V10',
                            Kfolds=3, shuffle=True, model='FCA',
                                 model_params= {'max_int':1, 'min_elems':0.7,'balance':True, 'prop':1})

100%|██████████| 319/319 [01:24<00:00,  3.41it/s]
100%|██████████| 319/319 [01:35<00:00,  3.61it/s]
100%|██████████| 320/320 [01:22<00:00,  3.62it/s]


In [44]:
Alg3_TTT_df = pd.DataFrame(Alg3_TTT)
Alg3_TTT_df['Time'] = ['01:24', '01:35', '01:22']
Alg3_TTT_df = Alg3_TTT_df.reset_index()

In [45]:
Alg3_TTT_df

Unnamed: 0,index,accuracy,precision,recall,ROC_AUC,Time
0,0,0.84953,0.890547,0.873171,0.840094,01:24
1,1,0.833856,0.870968,0.883178,0.808255,01:35
2,2,0.896875,0.92233,0.917874,0.888141,01:22


## Mushroom Classification

In [46]:
Alg3_MC = cross_validation(data_MC_short, target_column='class',
                            Kfolds=3, shuffle=True, model='FCA',
                                 model_params= {'max_int':1, 'min_elems':0.9,'balance':True, 'prop':1})

100%|██████████| 333/333 [03:18<00:00,  1.58it/s]
100%|██████████| 333/333 [03:15<00:00,  1.42it/s]
100%|██████████| 334/334 [03:12<00:00,  1.23it/s]


In [47]:
Alg3_MC_df = pd.DataFrame(Alg3_MC)
Alg3_MC_df['Time'] = ['03:18', '03:15', '03:12']
Alg3_MC_df = Alg3_MC_df.reset_index()

In [48]:
Alg3_MC_df

Unnamed: 0,index,accuracy,precision,recall,ROC_AUC,Time
0,0,0.993994,0.988764,1.0,0.993631,03:18
1,1,0.987988,0.984043,0.994624,0.987108,03:15
2,2,0.964072,0.934132,0.993631,0.965742,03:12


# Fourth algorithm

## Tic-Tac-Toe End game Dataset UCI

In [49]:
Alg4_TTT = cross_validation(data_TTT, target_column='V10',
                            Kfolds=3, shuffle=True, model='FCA',
                                 model_params= {'max_int':1, 'min_elems':0.7,'balance':True, 'prop':0.5})

100%|██████████| 319/319 [00:23<00:00, 15.41it/s]
100%|██████████| 319/319 [00:20<00:00, 15.54it/s]
100%|██████████| 320/320 [00:21<00:00, 14.09it/s]


In [55]:
Alg4_TTT_df = pd.DataFrame(Alg4_TTT)
Alg4_TTT_df['Time'] = ['00:23', '00:20', '00:21']
Alg4_TTT_df = Alg4_TTT_df.reset_index()

In [56]:
Alg4_TTT_df

Unnamed: 0,index,accuracy,precision,recall,ROC_AUC,Time
0,0,0.768025,0.855721,0.792627,0.754157,00:23
1,1,0.833856,0.911111,0.81592,0.840164,00:20
2,2,0.7625,0.823529,0.807692,0.743132,00:21


## Mushroom Classification

In [58]:
Alg4_MC = cross_validation(data_MC_short, target_column='class',
                            Kfolds=3, shuffle=True, model='FCA',
                                 model_params= {'max_int':1, 'min_elems':0.9,'balance':True, 'prop':0.5})

100%|██████████| 333/333 [00:52<00:00,  4.80it/s]
100%|██████████| 333/333 [00:49<00:00,  6.90it/s]
100%|██████████| 334/334 [00:49<00:00,  6.75it/s]


In [59]:
Alg4_MC_df = pd.DataFrame(Alg4_MC)
Alg4_MC_df['Time'] = ['00:52', '00:49', '00:49']
Alg4_MC_df = Alg4_MC_df.reset_index()

In [60]:
Alg4_MC_df

Unnamed: 0,index,accuracy,precision,recall,ROC_AUC,Time
0,0,0.975976,0.961538,0.994318,0.974866,00:52
1,1,0.972973,0.965714,0.982558,0.972646,00:49
2,2,0.98503,0.977011,0.994152,0.984806,00:49
