## Example script: training tau classifier for BG regions

**Read in relevant files**

In [1]:
import sys
sys.path.insert(0,
                '/Users/mokur/OneDrive - University of Cambridge/Attachments/Jan2023/Cell_pipeline/Cell_classification/')

from base import *
from constants import *
from cell_classification import * 
import joblib

### Data preparation

**Cell classifier for cortical regions**

In [2]:
path = "/Users/mokur/OneDrive - University of Cambridge/Attachments/Jan2023/Cell_pipeline/Cell_classification/clean_training_data/"
filename = "bg_training_data.txt"
data = pd.read_csv(path + filename,sep="\t")


In [3]:
data['Class'].value_counts()

Oligo     783
Others    458
Neuron    200
Astro     187
Name: Class, dtype: int64

In [4]:
X_train = data[training_features])
y_train = data['Class']

In [5]:
X_train.shape

(1628, 44)

### Initialising & training the classifiers

**Tau classifier for cortical regions**

In [6]:
bg_model = CellClassifier(hyperparameters=bg_classifier_hyperparams)
bg_model.pipeline

Pipeline(steps=[('normalizer', MinMaxScaler()),
                ('selector',
                 RFE(estimator=RandomForestClassifier(random_state=42),
                     n_features_to_select=38)),
                ('clf',
                 BalancedRandomForestClassifier(class_weight='balanced',
                                                max_depth=10, max_features=0.2,
                                                max_samples=0.75,
                                                min_samples_leaf=2,
                                                min_samples_split=5,
                                                n_estimators=600,
                                                random_state=42,
                                                sampling_strategy='not '
                                                                  'majority'))])

In [7]:
# Training 
bg_model.train(X=X_train,
                     Y=y_train)

In [8]:
bg_model.best_parameters

{0: (0.4592835374462488,
  0.6815173626938333,
  0.6891846826044234,
  0.7271929824561404),
 1: (0.48042408021969285, 0.8543324993612236, 0.8301907662241488, 0.89),
 2: (0.5122155925759821,
  0.917856339157616,
  0.9174491900312752,
  0.9206913339824732),
 3: (0.2164000689682492,
  0.8331716197778333,
  0.8025224539600778,
  0.87512077294686)}

In [9]:
bg_model.f_importance.head()

Unnamed: 0,features,importance
1,Nucleus: Area µm^2,0.123232
2,Nucleus: Length µm,0.110194
5,Nucleus: Max diameter µm,0.100658
6,Nucleus: Min diameter µm,0.077623
11,Cell: Max diameter µm,0.072434


In [10]:
# save the model
joblib.dump(bg_model, 'bg_cell_classifier.sav')

['bg_cell_classifier.sav']