In [1]:
# import packages
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import cnmfereview as cr
import config as cfg
import os
from joblib import dump, load

In [2]:
MODELDIR = Path('../best_models')

## Load your data

In [3]:
data = cr.Dataset(
    data_paths=cfg.data_paths,
    exp_id=cfg.exp_id,
    img_shape=cfg.img_shape,
    img_crop_size=cfg.img_crop_size,
    max_trace=cfg.max_trace_len,
    )

No preprocessing on spatial data
File ../data/cr_tutorialA_cropped.npy already exists and has been loaded instead.
No preprocessing on trace data.                   ../data/cr_tutorialCraw_normalized.npy already                   exists and has been loaded instead.
Successfully loaded data.


In [4]:
x_train, x_test, y_train, y_test = data.split_training_test_data(
    test_split=.20,
    seed=10
)

Training and test data loaded


In [5]:
x_train.shape

(11603, 6900)

_________________
**NOTE: Remove the next cell when training your own models.** This step uses fewer ROIs (only ~3000 instead of 11 000) in the tutorial dataset to speed up computation in the tutorial. Do not do this when you are training your own data. You want to use as many data samples as possible to get the best results in practice. 

In [6]:
# remove or comment out this cell when using on your own data
# from sklearn.model_selection import train_test_split
# x_train, _, y_train, _ = train_test_split(x_train, y_train, test_size=0.75)

____________________________

In [7]:
print(f"Number of samples in training set: {x_train.shape[0]}") 
print(f"Number of samples in test set: {x_test.shape[0]}")

Number of samples in training set: 11603
Number of samples in test set: 2901


# Train the saved models on your data

# TPOT Classifier

In [8]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score, accuracy_score
from sklearn.svm import LinearSVC

# this was the final TPOT exported pipeline that acheived the highest F1 score
tpot_model = LinearSVC(C=0.1, dual=False, loss="squared_hinge", penalty="l1", tol=0.1)
tpot_model.fit(x_train, y_train)

LinearSVC(C=0.1, dual=False, penalty='l1', tol=0.1)

In [9]:
tpot_y_pred = tpot_model.predict(x_train)
print("Accuracy:", accuracy_score(y_train, tpot_y_pred))
print("f1:", f1_score(y_train, tpot_y_pred))
dump(tpot_model, MODELDIR / f'{cfg.exp_id}_tpot.joblib') 

Accuracy: 0.879600103421529
f1: 0.9111153528026977


['../best_models/cr_tutorial_tpot.joblib']

Save a copy of the model finetuned on your data to use again in the future to predict without having to retrain.

# AutoSklearn Classifier

In [None]:
import autosklearn
import sklearn
# load the AutoSklearn ensemble object
askl = load(MODELDIR / 'cr_tutorial_askl.joblib')
askl.refit(x_train, y_train)

In [None]:
results_automl = askl.predict(x_test)
print("Accuracy:", accuracy_score(y_test, results_automl))
print("f1:", f1_score(y_test, results_automl))
dump(askl, MODELDIR / f'{cfg.exp_id}_askl.joblib')

# Deep Classifier

In [10]:
import ignite
from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
from ignite.handlers import ModelCheckpoint, EarlyStopping
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tensorboardX import SummaryWriter
from nn.model import Model

In [11]:
data.spatial.shape, data.trace.shape, data.targets.shape
x_train, x_test, y_train, y_test = data.split_training_test_data(
    test_split=.20, seed=10, for_deep=True)

class datasets(torch.utils.data.Dataset):
    def __init__(self, x, y, device):
        self.x, self.y = x, y
        self.device = device
    def __len__(self):
        return self.y.shape[0]
    def __getitem__(self, i):
        data = (self.x[0][i].to(self.device), self.x[1][i].to(self.device))
        return data, self.y[i].to(self.device)

device = 'cuda:0'
trainsets = datasets(x_train, y_train, device)
testsets = datasets(x_test, y_test, device)
train_loader = torch.utils.data.DataLoader(trainsets, batch_size=32)
test_loader = torch.utils.data.DataLoader(testsets, batch_size=32)

Training and test data loaded


In [12]:
def train(model):
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    criterion = F.binary_cross_entropy
    epochs = 30

    def preprocess(y):
        return torch.round(y[0]), y[1]

    precision = ignite.metrics.Precision(preprocess, average=False)
    recall = ignite.metrics.Recall(preprocess, average=False)
    F1 = (precision * recall * 2 / (precision + recall)).mean()

    trainer = create_supervised_trainer(model, optimizer, criterion, device=device)
    evaluator = create_supervised_evaluator(
        model,
        metrics={'accuracy': ignite.metrics.Accuracy(preprocess),
                 'f1': F1,
                 'cross_entropy': ignite.metrics.Loss(criterion)},
        device=device)
    writer = SummaryWriter()

    @trainer.on(Events.ITERATION_COMPLETED)
    def log_training_loss(engine):
        i = (engine.state.iteration - 1) % len(train_loader) + 1
        if i % 500 == 0:
            print(f"Epoch[{engine.state.epoch}] Iteration[{i}/{len(train_loader)}] "
                  f"Loss: {engine.state.output:.2f}")
            writer.add_scalar("training/loss", engine.state.output, engine.state.iteration)

    def write_metrics(metrics, writer, mode: str, epoch: int):
        """print metrics & write metrics to log"""
        avg_accuracy = metrics['accuracy']
        avg_nll = metrics['cross_entropy']
        avg_f1 = metrics['f1']
        print(f"{mode} Results - Epoch: {epoch}  "
              f"Avg accuracy: {avg_accuracy:.2f} Avg loss: {avg_nll:.2f} "
              f"Avg F1: {avg_f1:.2f}")
        writer.add_scalar(f"{mode}/avg_loss", avg_nll, epoch)
        writer.add_scalar(f"{mode}/avg_accuracy", avg_accuracy, epoch)

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_training_results(engine):
        evaluator.run(train_loader)
        metrics = evaluator.state.metrics
        write_metrics(metrics, writer, 'training', engine.state.epoch)

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_validation_results(engine):
        evaluator.run(test_loader)
        metrics = evaluator.state.metrics
        write_metrics(metrics, writer, 'validation', engine.state.epoch)

    handler = ModelCheckpoint(dirname='./checkpoints', filename_prefix='sample',
                              n_saved=5, create_dir=True, require_empty=False)
    trainer.add_event_handler(Events.EPOCH_COMPLETED, handler, {'mymodel': model})

    handler = EarlyStopping(
        patience=5,
        score_function=lambda x: x.state.metrics['accuracy'],
        trainer=trainer)
    evaluator.add_event_handler(Events.COMPLETED, handler)

    trainer.run(train_loader, max_epochs=epochs)

In [13]:
model = Model(s_stage='SAN').to(device)
train(model)

training Results - Epoch: 1  Avg accuracy: 0.83 Avg loss: 0.43 Avg F1: 0.85
validation Results - Epoch: 1  Avg accuracy: 0.82 Avg loss: 0.43 Avg F1: 0.85
training Results - Epoch: 2  Avg accuracy: 0.89 Avg loss: 0.28 Avg F1: 0.91
validation Results - Epoch: 2  Avg accuracy: 0.89 Avg loss: 0.28 Avg F1: 0.91


Engine run is terminating due to exception: .


KeyboardInterrupt: 

In [None]:
model = Model(s_stage='ResNet').to(device)
train(model)

# Apply classifiers to unlabeled data

In [None]:
askl = load(MODELDIR / f'{cfg.exp_id}_askl.joblib');
tpot_model = load(MODELDIR / f'{cfg.exp_id}_tpot.joblib')
cfg.img_shape

In [None]:
unseen_data = cr.UnlabeledDataset(
    mat_file='../data/unlabeled_rois_DM298.mat',
    img_shape={'x': 284, 'y': 231},
    img_crop_size=cfg.img_crop_size,
    max_trace=cfg.max_trace_len)

In [None]:
cfg.img_shape, cfg.img_crop_size, cfg.max_trace_len

In [None]:
pred_askl = askl.predict(unseen_data.combined)
pred_tpot = tpot_model.predict(unseen_data.combined)

In [None]:
# preview the ROIs labeled by askl as "positives"
positive_askl = np.where(pred_askl==1)[0]
# limit to only show 10 at once, you can play around with this of course
cr.plot_rois(unseen_data, positive_askl[:10])

In [None]:
# preview the ROIs labeled by askl as "negatives"
negative_askl = np.where(pred_askl==0)[0]
# limit to only show 10 at once, you can play around with this of course
cr.plot_rois(unseen_data, negative_askl[:10])

In [None]:
gt_label = [1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
accuracy_score(gt_label, pred_askl), f1_score(gt_label, pred_askl)

In [None]:
# preview the ROIs labeled by TPOT as "negatives"
cr.plot_rois(unseen_data, np.where(pred_tpot==0)[0][:10])

# Apply reviews

In [None]:
unseen_data.apply_labels(pred_askl)

In [None]:
# load the file to check the results
from scipy.io import loadmat, savemat

labeled_data = loadmat('../data/unlabeled_rois_automl.mat')