In [1]:
# import packages
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import cnmfereview as cr
import modules.model.config as cfg
import os
from joblib import dump, load

In [2]:
MODELDIR = Path('../models')

## Load your data

In [3]:
data = cr.Dataset(
    data_paths=cfg.data_paths,
    exp_id=cfg.exp_id,
    img_shape=cfg.img_shape,
    img_crop_size=cfg.img_crop_size,
    max_trace=cfg.max_trace_len,
    )

No preprocessing on spatial data
File ../data/cnmfe-reviewer/cr_tutorialA_cropped.npy already exists and has been loaded instead.
No preprocessing on trace data.                   ../data/cnmfe-reviewer/cr_tutorialCraw_normalized.npy already                   exists and has been loaded instead.
Successfully loaded data.


In [4]:
x_train, x_test, y_train, y_test = data.split_training_test_data(
    test_split=.20,
    seed=10
)

Training and test data loaded


In [5]:
x_train.shape

(11603, 6900)

_________________
**NOTE: Remove the next cell when training your own models.** This step uses fewer ROIs (only ~3000 instead of 11 000) in the tutorial dataset to speed up computation in the tutorial. Do not do this when you are training your own data. You want to use as many data samples as possible to get the best results in practice. 

In [6]:
# remove or comment out this cell when using on your own data
# from sklearn.model_selection import train_test_split
# x_train, _, y_train, _ = train_test_split(x_train, y_train, test_size=0.75)

____________________________

In [7]:
print(f"Number of samples in training set: {x_train.shape[0]}") 
print(f"Number of samples in test set: {x_test.shape[0]}")

Number of samples in training set: 11603
Number of samples in test set: 2901


# Train the saved models on your data

# TPOT Classifier

In [9]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score, accuracy_score
from sklearn.svm import LinearSVC

# this was the final TPOT exported pipeline that acheived the highest F1 score
tpot_model = LinearSVC(C=0.1, dual=False, loss="squared_hinge", penalty="l1", tol=0.1)
tpot_model.fit(x_train, y_train)

LinearSVC(C=0.1, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l1', random_state=None, tol=0.1,
          verbose=0)

In [10]:
tpot_y_pred = tpot_model.predict(x_train)
print("Accuracy:", accuracy_score(y_train, tpot_y_pred))
print("f1:", f1_score(y_train, tpot_y_pred))
dump(tpot_model, MODELDIR / f'{cfg.exp_id}_tpot.joblib') 

Accuracy: 0.8808066879255365
f1: 0.9119052168927957


['../models/cr_tutorial_tpot.joblib']

Save a copy of the model finetuned on your data to use again in the future to predict without having to retrain.

# AutoSklearn Classifier

In [None]:
import autosklearn
from autosklearn.classification import AutoSklearnClassifier
import sklearn
# load the AutoSklearn ensemble object
askl = AutoSklearnClassifier(
    memory_limit=2048,
    time_left_for_this_task=1800,
)
askl.fit(x_train, y_train)

In [None]:
results_automl = askl.predict(x_test)
print("Accuracy:", accuracy_score(y_test, results_automl))
print("f1:", f1_score(y_test, results_automl))
dump(askl, MODELDIR / f'{cfg.exp_id}_askl.joblib')

# Deep Classifier

In [None]:
data.spatial.shape, data.trace.shape, data.targets.shape
x_train, x_test, y_train, y_test = data.split_training_test_data(
    test_split=.20, seed=10, for_deep=True)

In [None]:
x_train[0].shape, x_train[1].shape

In [None]:
callbacks = []
# callbacks.append(keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, mode='auto'))

model = cr.get_model(with_spatial=True, use_cnn_for_trace=True, frame=500, crop_size=80)
fit = model.fit(x_train, y_train,
          batch_size=16,
          epochs=5,
          verbose=1,
          validation_data=(x_test, y_test),
          callbacks=callbacks)

In [None]:
len(x_train[0])

In [None]:
callbacks = []
# callbacks.append(keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, mode='auto'))

model = cr.get_model(with_spatial=True, use_cnn_for_trace=True, frame=500, crop_size=80)
fit = model.fit(x_train, y_train,
          batch_size=16,
          epochs=5,
          verbose=1,
          validation_data=(x_test, y_test),
          callbacks=callbacks)

In [None]:
results_dl = model.predict(x_test)
results_dl = results_dl > 0.5
print("Accuracy:", accuracy_score(y_test, results_dl))
print("f1:", f1_score(y_test, results_dl))

# Apply classifiers to unlabeled data

In [None]:
askl = load(MODELDIR / f'{cfg.exp_id}_askl.joblib');
tpot_model = load(MODELDIR / f'{cfg.exp_id}_tpot.joblib')
cfg.img_shape

In [None]:
unseen_data = cr.UnlabeledDataset(
    mat_file='../data/unlabeled_rois_DM298.mat',
    img_shape={'x': 284, 'y': 231},
    img_crop_size=cfg.img_crop_size,
    max_trace=cfg.max_trace_len)

In [None]:
cfg.img_shape, cfg.img_crop_size, cfg.max_trace_len

In [None]:
pred_askl = askl.predict(unseen_data.combined)
pred_tpot = tpot_model.predict(unseen_data.combined)

In [None]:
# preview the ROIs labeled by askl as "positives"
positive_askl = np.where(pred_askl==1)[0]
# limit to only show 10 at once, you can play around with this of course
cr.plot_rois(unseen_data, positive_askl[:10])

In [None]:
# preview the ROIs labeled by askl as "negatives"
negative_askl = np.where(pred_askl==0)[0]
# limit to only show 10 at once, you can play around with this of course
cr.plot_rois(unseen_data, negative_askl[:10])

In [None]:
gt_label = [1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
accuracy_score(gt_label, pred_askl), f1_score(gt_label, pred_askl)

In [None]:
# preview the ROIs labeled by TPOT as "negatives"
cr.plot_rois(unseen_data, np.where(pred_tpot==0)[0][:10])

# Apply reviews

In [None]:
unseen_data.apply_labels(pred_askl)

In [None]:
# load the file to check the results
from scipy.io import loadmat, savemat

labeled_data = loadmat('../data/unlabeled_rois_automl.mat')