In [1]:
# %load_ext autoreload
# %autoreload 2

In [2]:
import matplotlib.pyplot as plt
import utils
from models import *
from sklearn.metrics import accuracy_score

from skorch.dataset import Dataset
from skorch import NeuralNetClassifier, callbacks
from skorch.helper import predefined_split

from sklearn.svm import LinearSVC, SVC
from sklearn.ensemble import RandomForestClassifier

import matplotlib.pyplot as plt



Gudhi not found--GraphInducedComplex not available


In [3]:
TEST_FRACTION = 0.20
EPOCHS        = 20

In [4]:
# Read data
# data = utils.read_and_build_features()
data = utils.read_and_build_features()[:50]  # smaller data for testing purposes

# Split into train-test
dataset = utils.split_train_test(data, TEST_FRACTION)

corr_feature_size = dataset.X_train[0].corr_vector.shape[0]
pi_feature_size   = dataset.X_train[0].persistence_image.shape[0]
pl_feature_size   = dataset.X_train[0].persistence_landscape.shape[0]
pers_input_size   = 50

In [5]:
modelManager = ModelManager('../data_processed/', dataset, overwrite=True)

In [6]:
featureExtractors = {'corr': utils.get_corr_features,
                     'pi_corr': utils.get_pers_img_corr_features,
                     'pl_corr': utils.get_pers_landscape_corr_features,
                     'pd_corr': utils.get_pers_diag_corr_features,
                     'pi': utils.get_pers_img_features,
                     'pl': utils.get_pers_landscape_features,
                     'pd': utils.get_pers_diag_features,
                     'pd_kern': utils.get_pers_diag_kern_features
                    }

### Correlation models

In [7]:
# scheduler = callbacks.LRScheduler(policy='StepLR', step_size=25, gamma=0.1)
# checkpoint = callbacks.Checkpoint(f_params='best_model.pt', monitor='valid_acc_best')
# progbar = callbacks.ProgressBar()

In [8]:
svm_corr = SVC(kernel='linear')
rf_corr  = RandomForestClassifier(n_estimators=500, max_depth=5)
nn_corr  = NeuralNetClassifier(NNVec([corr_feature_size, 100, 2], dropout_prob=0.5),
                               max_epochs=EPOCHS, verbose=False, warm_start=True)
# nn_corr  = NeuralNetClassifier(NNVec([corr_feature_size, 100, 2], dropout_prob=0.5),
#                                max_epochs=EPOCHS, verbose=False, warm_start=True, lr=1, 
#                                callbacks=[scheduler, checkpoint])
    
#nn_corr.initialize()
# nn_corr.load_params(f_params='best_model.pt')

modelManager.add_model(svm_corr, 'svm_corr', featureExtractors['corr'])
modelManager.add_model(rf_corr , 'rf_corr', featureExtractors['corr'])
modelManager.add_model(nn_corr, 'nn_corr', featureExtractors['corr'])

In [9]:
# nn_corr.initialize()
# nn_corr.load_params(f_params='best_model.pt')

### Persistence image models

In [10]:
svm_pi = SVC(kernel='linear')
rf_pi  = RandomForestClassifier(n_estimators=500, max_depth=5)
nn_pi  = NeuralNetClassifier(NNVec([pi_feature_size, 10, 2]),
                             max_epochs=EPOCHS, verbose=False, warm_start=True)

modelManager.add_model(svm_pi, 'svm_pi', featureExtractors['pi'])
modelManager.add_model(rf_pi , 'rf_pi', featureExtractors['pi'])
modelManager.add_model(nn_pi, 'nn_pi', featureExtractors['pi'])

### Persistence Landscape models

In [11]:
svm_pl = SVC(kernel='linear')
rf_pl  = RandomForestClassifier(n_estimators=500, max_depth=5)
nn_pl  = NeuralNetClassifier(NNVec([pl_feature_size, 10, 2], dropout_prob=0.5),
                             max_epochs=EPOCHS, verbose=False, warm_start=True)

modelManager.add_model(svm_pl, 'svm_pl', featureExtractors['pl'])
modelManager.add_model(rf_pl , 'rf_pl', featureExtractors['pl'])
modelManager.add_model(nn_pl, 'nn_pl', featureExtractors['pl'])

### Persistence diagram models

In [12]:
nn_pd = NeuralNetClassifier(NNPersDiag([[pers_input_size, 25], [pers_input_size, 25]], [50, 2], dropout_prob=0.5),
                            max_epochs=EPOCHS, verbose=False, warm_start=True)

modelManager.add_model(nn_pd, 'nn_pd', featureExtractors['pd'])

### Hybrid models

In [13]:
pd_corr = NeuralNetClassifier(NNHybridPers([[pers_input_size, 25], [pers_input_size, 25]], [corr_feature_size, 500, 25], [75, 2], dropout_prob=0.5),
                              max_epochs=EPOCHS, verbose=False, warm_start=True)
pi_corr = NeuralNetClassifier(NNHybridVec([[pi_feature_size, 10], [corr_feature_size, 10]], [20, 10, 2], dropout_prob=0.5),
                              max_epochs=EPOCHS, verbose=False, warm_start=True)
pl_corr = NeuralNetClassifier(NNHybridVec([[pl_feature_size, 10], [corr_feature_size, 10]], [20, 10, 2], dropout_prob=0.5),
                              max_epochs=EPOCHS, verbose=False, warm_start=True)

modelManager.add_model(pd_corr, 'pd_corr', featureExtractors['pd_corr'])
modelManager.add_model(pi_corr, 'pi_corr', featureExtractors['pi_corr'])
modelManager.add_model(pl_corr, 'pl_corr', featureExtractors['pl_corr'])

### Topological kernels

In [14]:
svm_scalespace        = PersistenceKernelSVM(kernel_type='scale_space')
svm_weightedgaussian  = PersistenceKernelSVM(kernel_type='weighted_gaussian')
svm_slicedwasserstein = PersistenceKernelSVM(kernel_type='sliced_wasserstein')
svm_fisher            = PersistenceKernelSVM(kernel_type='fisher')

# modelManager.add_model(svm_scalespace, 'svm_scalespace', featureExtractors['pd_kern'])
# modelManager.add_model(svm_weightedgaussian, 'svm_weightedgaussian', featureExtractors['pd_kern'])
# modelManager.add_model(svm_slicedwasserstein, 'svm_slicedwasserstein', featureExtractors['pd_kern'])
# modelManager.add_model(svm_fisher, 'svm_fisher', featureExtractors['pd_kern'])

In [15]:
modelManager.train_all()

HBox(children=(IntProgress(value=0, max=13), HTML(value='')))




In [16]:
modelManager.evaluate_all(accuracy_score)

In [17]:
modelManager.tabulate()

Unnamed: 0,Model,Train time,Score
0,svm_corr,0.061681,0.6
1,rf_corr,0.555226,0.6
2,nn_corr,0.305447,0.4
3,svm_pi,0.006061,0.6
4,rf_pi,0.313481,0.8
5,nn_pi,0.069431,0.6
6,svm_pl,0.003193,0.7
7,rf_pl,0.306805,0.6
8,nn_pl,0.07707,0.6
9,nn_pd,1.46854,0.5


In [18]:
import pickle
with open('../models/modelManager.pkl', 'wb') as f:
    pickle.dump(modelManager, f)