## Load your data

In [1]:
# import packages
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import cnmfereview as cr
import config as cfg
import os
from joblib import dump, load

MODELDIR = Path('../best_models')

data = cr.Dataset(
    data_paths=cfg.data_paths,
    exp_id=cfg.exp_id,
    img_shape=cfg.img_shape,
    img_crop_size=cfg.img_crop_size,
    max_trace=cfg.max_trace_len,
)

x_train, x_test, y_train, y_test = data.split_training_test_data(
    test_split=.20,
    seed=10
)
x_train.shape

No preprocessing on spatial data
File ../data/cr_tutorialA_cropped.npy already exists and has been loaded instead.
No preprocessing on trace data.                   ../data/cr_tutorialCraw_normalized.npy already                   exists and has been loaded instead.
Successfully loaded data.
Training and test data loaded


(11603, 6900)

_________________
**NOTE: Remove the next cell when training your own models.** This step uses fewer ROIs (only ~3000 instead of 11 000) in the tutorial dataset to speed up computation in the tutorial. Do not do this when you are training your own data. You want to use as many data samples as possible to get the best results in practice. 

In [2]:
# remove or comment out this cell when using on your own data
# from sklearn.model_selection import train_test_split
# x_train, _, y_train, _ = train_test_split(x_train, y_train, test_size=0.75)

print(f"Number of samples in training set: {x_train.shape[0]}") 
print(f"Number of samples in test set: {x_test.shape[0]}")

Number of samples in training set: 11603
Number of samples in test set: 2901


# Train the saved models on your data
### Deep Classifier

In [3]:
import ignite
from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
from ignite.handlers import ModelCheckpoint, EarlyStopping
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tensorboardX import SummaryWriter
from nn.model import Model
import optuna

from nn.train import train

In [4]:
data.spatial.shape, data.trace.shape, data.targets.shape
x_train, x_test, y_train, y_test = data.split_training_test_data(
    test_split=.20, seed=10, for_deep=True)

class datasets(torch.utils.data.Dataset):
    def __init__(self, x, y, device):
        self.x, self.y = x, y
        self.device = device
    def __len__(self):
        return self.y.shape[0]
    def __getitem__(self, i):
        data = (self.x[0][i].to(self.device), self.x[1][i].to(self.device))
        return data, self.y[i].to(self.device)

device = 'cuda:0'
trainsets = datasets(x_train, y_train, device)
testsets = datasets(x_test, y_test, device)
train_loader = torch.utils.data.DataLoader(trainsets, batch_size=32)
test_loader = torch.utils.data.DataLoader(testsets, batch_size=32)

Training and test data loaded


In [5]:
model = Model(
    s_stage='ResNet',
    res_block_num=5,
    t_hidden_dim=500,
    t_output_dim=500
)
train(model, train_loader, test_loader, device)

spatial feature len: 2048, temporal feature len: 500
training Results - Epoch: 1  Avg accuracy: 0.875 Avg loss: 0.328 Avg F1: 0.899
validation Results - Epoch: 1  Avg accuracy: 0.874 Avg loss: 0.319 Avg F1: 0.898
training Results - Epoch: 2  Avg accuracy: 0.890 Avg loss: 0.257 Avg F1: 0.913
validation Results - Epoch: 2  Avg accuracy: 0.891 Avg loss: 0.259 Avg F1: 0.915
training Results - Epoch: 3  Avg accuracy: 0.869 Avg loss: 0.356 Avg F1: 0.905
validation Results - Epoch: 3  Avg accuracy: 0.872 Avg loss: 0.348 Avg F1: 0.907
training Results - Epoch: 4  Avg accuracy: 0.891 Avg loss: 0.252 Avg F1: 0.914
validation Results - Epoch: 4  Avg accuracy: 0.887 Avg loss: 0.256 Avg F1: 0.910
training Results - Epoch: 5  Avg accuracy: 0.897 Avg loss: 0.244 Avg F1: 0.920
validation Results - Epoch: 5  Avg accuracy: 0.896 Avg loss: 0.251 Avg F1: 0.919
training Results - Epoch: 6  Avg accuracy: 0.895 Avg loss: 0.243 Avg F1: 0.918
validation Results - Epoch: 6  Avg accuracy: 0.891 Avg loss: 0.253 A

0.9397879575915183

In [None]:
# 5 is best
# scores = []
# for i in range(1, 6):
#     model = Model(
#         s_stage='ResNet',
#         res_block_num=i,
#     )
#     scores.append()
for i in range(5):
    print(i+1, scores[i])

In [None]:
def optimaze_san(trial):
    block_num = trial.suggest_int('block_num', 1, 5)
    layer_size_hop = trial.suggest_int('layer_size_hop', 2, 5)
    kernel_size = trial.suggest_int('kernel_size', 3, 7, 2)

    layers = [3]
    kernels = [3]
    for i in range(1, block_num):
        layers.append(2 + i*layer_size_hop)
        kernels.append(kernel_size)
    
    model = Model(
        s_stage='SAN',
        san_layers=layers,
        san_kernels=kernels,
    )
    score = train(model.to(device))
    return -score
study = optuna.create_study()
study.optimize(optimaze_san, n_trials=30)

In [None]:
def optimaze_lstm(trial):
    model = Model(
        s_stage='ResNet',
        res_block_num=4,
        t_hidden_dim=trial.suggest_int('t_hidden_dim', 50, 500, 50),
        t_output_dim=trial.suggest_int('t_output_dim', 50, 500, 50),
    )
    score = train(model.to(device))
    return -score
study = optuna.create_study()
study.optimize(optimaze_lstm, n_trials=30)

### TPOT Classifier

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score, accuracy_score
from sklearn.svm import LinearSVC

# this was the final TPOT exported pipeline that acheived the highest F1 score
tpot_model = LinearSVC(C=0.1, dual=False, loss="squared_hinge", penalty="l1", tol=0.1)
tpot_model.fit(x_train, y_train)

In [None]:
tpot_y_pred = tpot_model.predict(x_train)
print("Accuracy:", accuracy_score(y_train, tpot_y_pred))
print("f1:", f1_score(y_train, tpot_y_pred))
dump(tpot_model, MODELDIR / f'{cfg.exp_id}_tpot.joblib') 

Save a copy of the model finetuned on your data to use again in the future to predict without having to retrain.
### AutoSklearn Classifier

In [None]:
import autosklearn
import sklearn
# load the AutoSklearn ensemble object
askl = load(MODELDIR / 'cr_tutorial_askl.joblib')
askl.refit(x_train, y_train)

In [None]:
results_automl = askl.predict(x_test)
print("Accuracy:", accuracy_score(y_test, results_automl))
print("f1:", f1_score(y_test, results_automl))
dump(askl, MODELDIR / f'{cfg.exp_id}_askl.joblib')

# Apply classifiers to unlabeled data

In [None]:
askl = load(MODELDIR / f'{cfg.exp_id}_askl.joblib');
tpot_model = load(MODELDIR / f'{cfg.exp_id}_tpot.joblib')
cfg.img_shape

In [None]:
unseen_data = cr.UnlabeledDataset(
    mat_file='../data/unlabeled_rois_DM298.mat',
    img_shape={'x': 284, 'y': 231},
    img_crop_size=cfg.img_crop_size,
    max_trace=cfg.max_trace_len)

In [None]:
cfg.img_shape, cfg.img_crop_size, cfg.max_trace_len

In [None]:
pred_askl = askl.predict(unseen_data.combined)
pred_tpot = tpot_model.predict(unseen_data.combined)

In [None]:
# preview the ROIs labeled by askl as "positives"
positive_askl = np.where(pred_askl==1)[0]
# limit to only show 10 at once, you can play around with this of course
cr.plot_rois(unseen_data, positive_askl[:10])

In [None]:
# preview the ROIs labeled by askl as "negatives"
negative_askl = np.where(pred_askl==0)[0]
# limit to only show 10 at once, you can play around with this of course
cr.plot_rois(unseen_data, negative_askl[:10])

In [None]:
gt_label = [1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
accuracy_score(gt_label, pred_askl), f1_score(gt_label, pred_askl)

In [None]:
# preview the ROIs labeled by TPOT as "negatives"
cr.plot_rois(unseen_data, np.where(pred_tpot==0)[0][:10])

# Apply reviews

In [None]:
unseen_data.apply_labels(pred_askl)

In [None]:
# load the file to check the results
from scipy.io import loadmat, savemat

labeled_data = loadmat('../data/unlabeled_rois_automl.mat')