## Example script: training tau classifier for cortical regions

**Read in relevant files**

In [1]:
import sys
sys.path.insert(0,
                '/Users/mokur/OneDrive - University of Cambridge/Attachments/Jan2023/Cell_pipeline/Cell_classification/')

from base import *
from constants import *
from cell_classification import * 
import joblib

### Data preparation

**Cell classifier for cortical regions**

In [2]:
path = "/Users/mokur/OneDrive - University of Cambridge/Attachments/Jan2023/Cell_pipeline/Cell_classification/clean_training_data/"
filename = "cortical_training_data.txt"
data = pd.read_csv(path + filename,sep="\t")


In [3]:
data['Class'].value_counts()

Neuron    885
Oligo     792
Others    617
Astro     179
Name: Class, dtype: int64

In [4]:
X_train = data[training_features]
y_train = data['Class']

In [5]:
X_train.shape

(2473, 44)

In [6]:
X_train.columns

Index(['Detection probability', 'Nucleus: Area µm^2', 'Nucleus: Length µm',
       'Nucleus: Circularity', 'Nucleus: Solidity', 'Nucleus: Max diameter µm',
       'Nucleus: Min diameter µm', 'Cell: Area µm^2', 'Cell: Length µm',
       'Cell: Circularity', 'Cell: Solidity', 'Cell: Max diameter µm',
       'Cell: Min diameter µm', 'Nucleus/Cell area ratio',
       'Hematoxylin: Nucleus: Mean', 'Hematoxylin: Nucleus: Median',
       'Hematoxylin: Nucleus: Min', 'Hematoxylin: Nucleus: Max',
       'Hematoxylin: Nucleus: Std.Dev.', 'Hematoxylin: Cytoplasm: Mean',
       'Hematoxylin: Cytoplasm: Median', 'Hematoxylin: Cytoplasm: Min',
       'Hematoxylin: Cytoplasm: Max', 'Hematoxylin: Cytoplasm: Std.Dev.',
       'Hematoxylin: Membrane: Mean', 'Hematoxylin: Membrane: Median',
       'Hematoxylin: Membrane: Min', 'Hematoxylin: Membrane: Max',
       'Hematoxylin: Membrane: Std.Dev.', 'Hematoxylin: Cell: Mean',
       'Hematoxylin: Cell: Median', 'Hematoxylin: Cell: Min',
       'Hematoxylin

### Initialising & training the classifiers

**Tau classifier for cortical regions**

In [5]:
cortical_model = CellClassifier(hyperparameters=cortical_classifier_hyperparams)
cortical_model.pipeline

Pipeline(steps=[('normalizer', MinMaxScaler()),
                ('selector',
                 RFE(estimator=RandomForestClassifier(random_state=42),
                     n_features_to_select=38)),
                ('clf',
                 BalancedRandomForestClassifier(class_weight='balanced',
                                                max_features=0.2,
                                                max_samples=0.5,
                                                min_samples_leaf=4,
                                                min_samples_split=5,
                                                n_estimators=900,
                                                random_state=42,
                                                sampling_strategy='not '
                                                                  'majority'))])

In [6]:
# Training 
cortical_model.train(X=X_train,
                     Y=y_train)

In [7]:
cortical_model.best_parameters

{0: (0.5052076758986506,
  0.5843123101910119,
  0.5443412365069623,
  0.7156862745098039),
 1: (0.5122703624147519,
  0.9112137156584529,
  0.9126933052503394,
  0.9105209397344227),
 2: (0.2828375714157505,
  0.8576644007790974,
  0.832396168021168,
  0.892610759493671),
 3: (0.28468038825335934,
  0.7667554546504622,
  0.7432463865928447,
  0.8087519830777368)}

In [8]:
cortical_model.f_importance.head()

Unnamed: 0,features,importance
5,Nucleus: Max diameter µm,0.121259
1,Nucleus: Area µm^2,0.113244
2,Nucleus: Length µm,0.109141
11,Cell: Max diameter µm,0.108733
6,Nucleus: Min diameter µm,0.085914


In [9]:
# save the model
joblib.dump(cortical_model, 'cortical_cell_classifier.sav')

['cortical_cell_classifier.sav']