## Example script: training tau classifier for occipital regions

**Read in relevant files**

In [1]:
import sys
sys.path.insert(0,
                '/Users/mokur/OneDrive - University of Cambridge/Attachments/Jan2023/Cell_pipeline/Cell_classification/')

from base import *
from constants import *
from cell_classification import * 
import joblib

### Data preparation

**Cell classifier for cortical regions**

In [2]:
path = "/Users/mokur/OneDrive - University of Cambridge/Attachments/Jan2023/Cell_pipeline/Cell_classification/clean_training_data/"
filename = "occipital_training_data.txt"
data = pd.read_csv(path + filename,sep="\t")


In [3]:
data['Class'].value_counts()

Others    532
Neuron    476
Oligo     445
Astro     220
Name: Class, dtype: int64

In [4]:
X_train = data[training_features]
y_train = data['Class']
print(X_train.shape)

(1673, 44)


In [5]:
X_train.columns

Index(['Detection probability', 'Nucleus: Area µm^2', 'Nucleus: Length µm',
       'Nucleus: Circularity', 'Nucleus: Solidity', 'Nucleus: Max diameter µm',
       'Nucleus: Min diameter µm', 'Cell: Area µm^2', 'Cell: Length µm',
       'Cell: Circularity', 'Cell: Solidity', 'Cell: Max diameter µm',
       'Cell: Min diameter µm', 'Nucleus/Cell area ratio',
       'Hematoxylin: Nucleus: Mean', 'Hematoxylin: Nucleus: Median',
       'Hematoxylin: Nucleus: Min', 'Hematoxylin: Nucleus: Max',
       'Hematoxylin: Nucleus: Std.Dev.', 'Hematoxylin: Cytoplasm: Mean',
       'Hematoxylin: Cytoplasm: Median', 'Hematoxylin: Cytoplasm: Min',
       'Hematoxylin: Cytoplasm: Max', 'Hematoxylin: Cytoplasm: Std.Dev.',
       'Hematoxylin: Membrane: Mean', 'Hematoxylin: Membrane: Median',
       'Hematoxylin: Membrane: Min', 'Hematoxylin: Membrane: Max',
       'Hematoxylin: Membrane: Std.Dev.', 'Hematoxylin: Cell: Mean',
       'Hematoxylin: Cell: Median', 'Hematoxylin: Cell: Min',
       'Hematoxylin

### Initialising & training the classifiers

**Tau classifier for cortical regions**

In [6]:
occipital_model = CellClassifier(hyperparameters=occipital_classifier_hyperparams)
occipital_model.pipeline

Pipeline(steps=[('normalizer', MinMaxScaler()),
                ('selector',
                 RFE(estimator=RandomForestClassifier(random_state=42),
                     n_features_to_select=28)),
                ('clf',
                 BalancedRandomForestClassifier(class_weight='balanced',
                                                max_depth=15, max_features=0.4,
                                                max_samples=0.25,
                                                min_samples_leaf=4,
                                                min_samples_split=10,
                                                n_estimators=200,
                                                random_state=42))])

In [7]:
# Training 
occipital_model.train(X=X_train,
                     Y=y_train)

In [8]:
occipital_model.best_parameters

{0: (0.46755140108518506,
  0.64622061572346,
  0.5524263118410125,
  0.809090909090909),
 1: (0.35800301310120014,
  0.8404223852097499,
  0.8257465256800274,
  0.8658687943262411),
 2: (0.38978088189514104,
  0.8427582788906303,
  0.8476453077212108,
  0.848989898989899),
 3: (0.3327053512581415,
  0.833597139312819,
  0.8379277250873924,
  0.8324248777078965)}

In [9]:
occipital_model.f_importance.head()

Unnamed: 0,features,importance
1,Nucleus: Area µm^2,0.132467
4,Nucleus: Max diameter µm,0.124573
0,Detection probability,0.117742
2,Nucleus: Length µm,0.117511
5,Nucleus: Min diameter µm,0.079836


In [10]:
# save the model
joblib.dump(occipital_model, 'occipital_cell_classifier.sav')

['occipital_cell_classifier.sav']