## Load your data

In [1]:
# import packages
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import cnmfereview as cr
import config as cfg
import os
from joblib import dump, load

MODELDIR = Path('../best_models')

data = cr.Dataset(
    data_paths=cfg.data_paths,
    exp_id=cfg.exp_id,
    img_shape=cfg.img_shape,
    img_crop_size=cfg.img_crop_size,
    max_trace=cfg.max_trace_len,
)

x_train, x_test, y_train, y_test = data.split_training_test_data(
    test_split=.20,
    seed=10
)
x_train.shape

No preprocessing on spatial data
File ../data/cr_tutorialA_cropped.npy already exists and has been loaded instead.
No preprocessing on trace data.                   ../data/cr_tutorialCraw_normalized.npy already                   exists and has been loaded instead.
Successfully loaded data.
Training and test data loaded


(11603, 6900)

_________________
**NOTE: Remove the next cell when training your own models.** This step uses fewer ROIs (only ~3000 instead of 11 000) in the tutorial dataset to speed up computation in the tutorial. Do not do this when you are training your own data. You want to use as many data samples as possible to get the best results in practice. 

In [2]:
# remove or comment out this cell when using on your own data
# from sklearn.model_selection import train_test_split
# x_train, _, y_train, _ = train_test_split(x_train, y_train, test_size=0.75)

print(f"Number of samples in training set: {x_train.shape[0]}") 
print(f"Number of samples in test set: {x_test.shape[0]}")

Number of samples in training set: 11603
Number of samples in test set: 2901


# Train the saved models on your data
### Deep Classifier

In [3]:
import ignite
from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
from ignite.handlers import ModelCheckpoint, EarlyStopping
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tensorboardX import SummaryWriter
import optuna

from nn.model import Model
from nn.train import train, NNDataset

AttributeError: module 'cupy' has no attribute 'util'

In [None]:
data.spatial.shape, data.trace.shape, data.targets.shape
x_train, x_test, y_train, y_test = data.split_training_test_data(
    test_split=.20, seed=10, for_deep=True)

device = 'cuda:0'
trainsets = NNDataset(x_train, y_train, device)
testsets = NNDataset(x_test, y_test, device)
train_loader = torch.utils.data.DataLoader(trainsets, batch_size=32)
test_loader = torch.utils.data.DataLoader(testsets, batch_size=32)
model = Model(
    s_stage='resnet18',
    res_block_num=5,
    t_hidden_dim=500,
    t_output_dim=500
)

In [None]:
score, model = train(model, train_loader, test_loader, device)

In [None]:
model = model.to('cpu')
torch.save(model.state_dict(), '../best_models/deep_model.pth')

In [None]:
# 5 is best
scores = []
for i in range(1, 6):
    model = Model(
        s_stage='ResNet',
        res_block_num=i,
    )
    scores.append()
for i in range(5):
    print(i+1, scores[i])

In [None]:
def optimaze_san(trial):
    block_num = trial.suggest_int('block_num', 1, 5)
    layer_size_hop = trial.suggest_int('layer_size_hop', 2, 5)
    kernel_size = trial.suggest_int('kernel_size', 3, 7, 2)

    layers = [3]
    kernels = [3]
    for i in range(1, block_num):
        layers.append(2 + i*layer_size_hop)
        kernels.append(kernel_size)
    
    model = Model(
        s_stage='SAN',
        san_layers=layers,
        san_kernels=kernels,
    )
    score = train(model.to(device))
    return -score
study = optuna.create_study()
study.optimize(optimaze_san, n_trials=30)

In [None]:
def optimaze_lstm(trial):
    model = Model(
        s_stage='ResNet',
        res_block_num=4,
        t_hidden_dim=trial.suggest_int('t_hidden_dim', 50, 500, 50),
        t_output_dim=trial.suggest_int('t_output_dim', 50, 500, 50),
    )
    score = train(model.to(device))
    return -score
study = optuna.create_study()
study.optimize(optimaze_lstm, n_trials=30)

### Sklearn Classifier

In [None]:
import sklearn
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score

def print_score(results):
    print('Acc, F1, P, R: ', end='')
    print(f'{accuracy_score(y_test, results):.3f} & ', end='')
    print(f'{f1_score(y_test, results):.3f} & ', end='')
    print(f'{precision_score(y_test, results):.3f} & ', end='')
    print(f'{recall_score(y_test, results):.3f}')

In [35]:
from sklearn.tree import DecisionTreeClassifier
tree_model = DecisionTreeClassifier()
tree_model.fit(x_train, y_train)
results_tree = tree_model.predict(x_test)
print_score(recall_tree)

0.831 & 0.866 & 0.871 & 0.861


In [36]:
from sklearn.neighbors import KNeighborsClassifier
K_model = KNeighborsClassifier()
K_model.fit(x_train, y_train)
results_K = K_model.predict(x_test)
print_score(results_K)

0.844 & 0.882 & 0.843 & 0.926


### TPOT Classifier

In [17]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from sklearn.svm import LinearSVC

# this was the final TPOT exported pipeline that acheived the highest F1 score
# tpot_model = LinearSVC(C=0.1, dual=False, loss="squared_hinge", penalty="l1", tol=0.1)
# tpot_model.fit(x_train, y_train)
# dump(tpot_model, MODELDIR / f'{cfg.exp_id}_tpot.joblib') 



In [22]:
tpot_model = load(MODELDIR / f'{cfg.exp_id}_tpot.joblib')
results_tpot = tpot_model.predict(x_test)
print_score(results_tpot)

Accuracy: 0.8783178214408824
f1: 0.9102009666751462
P, R: 0.853530534351145 0.9749318801089918


Save a copy of the model finetuned on your data to use again in the future to predict without having to retrain.
### AutoSklearn Classifier

In [13]:
import autosklearn

# load the AutoSklearn ensemble object
# askl.refit(x_train, y_train)
# dump(askl, MODELDIR / f'{cfg.exp_id}_askl.joblib')

In [21]:
askl = load(MODELDIR / f'{cfg.exp_id}_askl.joblib')
results_automl = askl.predict(x_test)
print_score(results_automl)



Accuracy: 0.8772836952774905
f1: 0.9074844074844074
P, R: 0.8673621460506706 0.9514986376021798


# Apply classifiers to unlabeled data

In [None]:
askl = load(MODELDIR / f'{cfg.exp_id}_askl.joblib');
tpot_model = load(MODELDIR / f'{cfg.exp_id}_tpot.joblib')
cfg.img_shape

In [None]:
unseen_data = cr.UnlabeledDataset(
    mat_file='../data/unlabeled_rois_DM298.mat',
    img_shape={'x': 284, 'y': 231},
    img_crop_size=cfg.img_crop_size,
    max_trace=cfg.max_trace_len)

In [None]:
cfg.img_shape, cfg.img_crop_size, cfg.max_trace_len

In [None]:
pred_askl = askl.predict(unseen_data.combined)
pred_tpot = tpot_model.predict(unseen_data.combined)

In [None]:
# preview the ROIs labeled by askl as "positives"
positive_askl = np.where(pred_askl==1)[0]
# limit to only show 10 at once, you can play around with this of course
cr.plot_rois(unseen_data, positive_askl[:10])

In [None]:
# preview the ROIs labeled by askl as "negatives"
negative_askl = np.where(pred_askl==0)[0]
# limit to only show 10 at once, you can play around with this of course
cr.plot_rois(unseen_data, negative_askl[:10])

In [None]:
gt_label = [1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
accuracy_score(gt_label, pred_askl), f1_score(gt_label, pred_askl)

In [None]:
# preview the ROIs labeled by TPOT as "negatives"
cr.plot_rois(unseen_data, np.where(pred_tpot==0)[0][:10])

# Apply reviews

In [None]:
unseen_data.apply_labels(pred_askl)

In [None]:
# load the file to check the results
from scipy.io import loadmat, savemat

labeled_data = loadmat('../data/unlabeled_rois_automl.mat')