## Example script: training tau classifier for cortical regions

**Read in relevant files**

In [1]:
import sys
sys.path.insert(0,
                '/Users/tanrada/OneDrive - University of Cambridge/Attachments/Jan2023/Tau_pipeline/Tau_classification/')

from base import *
from constants import *
from tau_classification import * 

### Data preparation

**Screening classifier**

In [2]:
path = "/Users/tanrada/OneDrive - University of Cambridge/Attachments/Jan2023/Tau_pipeline/Tau_classification/Training_data/screening_classifier/"
filename = "training.txt"

# Create tau database object 
s_data = TauDataBase(path = path,
                        filename = filename) 

# Prepping data to train screening classifier
s_data.classifier1_prep()

# Check data
print(s_data.c1_data['Class'].value_counts())
print(s_data.c1_X_train.shape)

Tau        10185
Non_tau     9963
Name: Class, dtype: int64
(20148, 54)


**Tau classifier for cortical regions**

In [3]:
path = "/Users/tanrada/OneDrive - University of Cambridge/Attachments/Jan2023/Tau_pipeline/Tau_classification/Training_data/cortical_regions/"
filename = "training.txt"

# Create tau database object 
cortical_data = TauDataBase(path = path,
                        filename = filename) 

# Prepping data to train tau classifier for cortical regions
cortical_data.classifier2_prep()

# Check data
print(cortical_data.c2_data['Class'].value_counts())
print(cortical_data.c2_X_train.shape)

Others    2837
CB         497
TA         303
NFT        147
Name: Class, dtype: int64
(3784, 54)


### Initialising & training the classifiers

**Screening classifier**

In [4]:
screening_model = ScreeningClassifier(screening_classifier_hyperparams)
screening_model.pipeline

Pipeline(steps=[('normalizer', MinMaxScaler()),
                ('selector',
                 RFE(estimator=RandomForestClassifier(random_state=42),
                     n_features_to_select=44)),
                ('clf',
                 BalancedRandomForestClassifier(class_weight='balanced',
                                                max_features=1,
                                                max_samples=0.75,
                                                min_samples_leaf=2,
                                                n_estimators=300,
                                                random_state=42,
                                                sampling_strategy='not '
                                                                  'majority'))])

In [6]:
# Training 
screening_model.train(s_data.c1_X_train,
                     s_data.c1_Y_train)

In [8]:
screening_model.f_importance.head()

Unnamed: 0,features,importance
20,ROI: 0.25 µm per pixel: DAB: Max,0.053625
16,ROI: 0.25 µm per pixel: DAB: Haralick Sum aver...,0.04833
26,ROI: 0.25 µm per pixel: Green: Mean,0.041939
21,ROI: 0.25 µm per pixel: DAB: Mean,0.041595
35,ROI: 0.25 µm per pixel: Red: Mean,0.040228


**Tau classifier for cortical regions**

In [9]:
cortical_model = TauClassifier(cortical_classifier_hyperparams)
cortical_model.pipeline

Pipeline(steps=[('normalizer', MinMaxScaler()),
                ('selector',
                 RFE(estimator=RandomForestClassifier(random_state=42),
                     n_features_to_select=34)),
                ('clf',
                 BalancedRandomForestClassifier(class_weight='balanced',
                                                max_depth=15, max_features=0.6,
                                                max_samples=0.75,
                                                min_samples_leaf=2,
                                                n_estimators=500,
                                                random_state=42,
                                                sampling_strategy='not '
                                                                  'majority'))])

In [10]:
# Training 
cortical_model.train(cortical_data.c2_X_train,
                     cortical_data.c2_Y_train)

In [11]:
cortical_model.best_parameters

{0: (0.2631443106950739,
  0.9341809886119208,
  0.9296717536071032,
  0.9397142857142857),
 1: (0.5470399932259464,
  0.9563666419478434,
  0.9506372549019607,
  0.9661904761904762),
 2: (0.6309520256218469,
  0.9922822725611444,
  0.9895815269468227,
  0.9950654456775991),
 3: (0.44842729381540636,
  0.9388149165443382,
  0.9216810613134141,
  0.9607526881720432)}

In [12]:
cortical_model.f_importance.head()

Unnamed: 0,features,importance
0,Area µm^2,0.26115
4,Min diameter µm,0.193786
2,Length µm,0.054853
16,ROI: 0.25 µm per pixel: DAB: Median,0.052382
5,ROI: 0.25 µm per pixel: Blue: Mean,0.051438


### Putting them together: tau classification pipeline for novel *cortical* slides

In [34]:
novel_path = '/Users/tanrada/OneDrive - University of Cambridge/Attachments/Jan2023/Detections/cortical/'
novel_filename = 'noartefact_files.txt'
prediction_path = "/Users/tanrada/OneDrive - University of Cambridge/Attachments/Jan2023/Predictions_new/Cortical/"

**Reading in files**

In [35]:
with open(novel_path + novel_filename) as f:
    mylist = f.read().splitlines()

print("Read in: ", len(mylist), "files")

Read in:  160 files


#### Tau classification

In [36]:
n_total = len(mylist)
faulty_file = []
for i in range(0, n_total):

    # Read in novel slide
    print("FILE", mylist[i], "Number: ", i + 1, "/", n_total)
    print("---------------STEP1: Read in data file -------------------")
    dat_file = mylist[i]

    dat_ = pd.read_csv(novel_path + dat_file, sep="\t")
    # Changing column names
    # since these names tend to be inconsistent causing problems
    dat_.columns.values[5] = "Centroid_X"
    dat_.columns.values[6] = "Centroid_Y"

    # Fixing order of the columns
    ordered = dat_[extracted_features]

    dat = ordered[ordered["Class"] == "Unlabelled"]  # only unlabelled cells
    print("Read in data file:", dat_file)
    print("Data shape is:", dat.shape)

    # Classifier 1: separating Non-tau from Tau
    print(
        "---------------STEP2: Separating Non-tau from Tau -------------------"
    )
    # 1) Remove NA cells
    dat = dat.dropna()

    predicted1_slide = dat.copy()

    # 2) Dropping extra info features
    X_unlabelled = dat.drop(
        columns=["Image",
                 "Name",
                 "Class",
                 "Parent",
                 "ROI",
                 "Centroid_X",
                 "Centroid_Y"]
    )
    # 3) Predictions
    screening_model.predict(X_unlabelled)
    predicted1_slide["Class"] = screening_model.prediction
    print(predicted1_slide["Class"].value_counts())

    # Classifier 2: tau hallmark classification
    print(
        "---------------STEP3: Tau hallmark classification -------------------"
    )
    # Select out 'tau' portion only (ignoring non-tau & ambiguous cells )
    tau_portion = predicted1_slide[predicted1_slide["Class"] == "Tau"]
    if tau_portion.shape[0] == 0:
        print("There is no tau on this slide!")
        faulty_file.append(dat["Image"][0] + " No tau on the slide")
        continue
    tau_portion_X = tau_portion.drop(
        columns=["Image",
                 "Name",
                 "Class",
                 "Parent",
                 "ROI",
                 "Centroid_X",
                 "Centroid_Y"])
    predicted2_slide = tau_portion.copy()
    # Separates out detections that are not tau
    non_tau_portion = predicted1_slide[predicted1_slide["Class"] != "Tau"]

    # Make predictions on Tau objects
    cortical_model.predict(tau_portion_X)
    predicted2_slide["Class"] = cortical_model.prediction
    print(predicted2_slide["Class"].value_counts())

    # Extracting data out
    print("---------------STEP4: Data extraction & export -------------------")

    # 1) Combining predicted cells & excluded cells (prior to prediction)
    total_pred = pd.concat([non_tau_portion,
                            predicted2_slide])
    print("No loss of cells? ",
          predicted1_slide.shape[0] == total_pred.shape[0])

    output_visualise = total_pred[["Image",
                                   "Class",
                                   "Centroid_X",
                                   "Centroid_Y",
                                   "Area µm^2"]]
    path_ = (
            prediction_path +
            output_visualise.iloc[0, 0] +
            "_predictions.txt"
            )
    output_visualise.to_csv(path_, sep="\t", index=False)

    print("Exported prediction of : ", dat_file)
    print("---------------------------------------------------")
print("Well done, no error!")

FILE 703472.svs Detections.txt Number:  1 / 160
---------------STEP1: DATA FILE-------------------
Read in data file: 703472.svs Detections.txt
Data shape is: (80333, 61)
---------------STEP2: SEPARATING NON-TAU FROM TAU using Classifier1 -------------------
Non_tau    78610
Tau         1723
Name: Class, dtype: int64
---------------STEP4: Tau hallmark classification using Classifier2 -------------------
Others       1164
CB            456
NFT            56
Ambiguous      33
TA             14
Name: Class, dtype: int64
---------------STEP5: DATA EXTRACTION & EXPORT-------------------
No loss of cells?  True
Exported prediction of :  703472.svs Detections.txt
----------------------------------------------------------------------------
FILE 703473.svs Detections.txt Number:  2 / 160
---------------STEP1: DATA FILE-------------------
Read in data file: 703473.svs Detections.txt
Data shape is: (82112, 61)
---------------STEP2: SEPARATING NON-TAU FROM TAU using Classifier1 -------------------

Read in data file: 721700.svs Detections.txt
Data shape is: (25934, 61)
---------------STEP2: SEPARATING NON-TAU FROM TAU using Classifier1 -------------------
Non_tau    17139
Tau         8795
Name: Class, dtype: int64
---------------STEP4: Tau hallmark classification using Classifier2 -------------------
Others       6609
CB           1454
TA            303
NFT           249
Ambiguous     180
Name: Class, dtype: int64
---------------STEP5: DATA EXTRACTION & EXPORT-------------------
No loss of cells?  True
Exported prediction of :  721700.svs Detections.txt
----------------------------------------------------------------------------
FILE 721701.svs Detections.txt Number:  13 / 160
---------------STEP1: DATA FILE-------------------
Read in data file: 721701.svs Detections.txt
Data shape is: (41634, 61)
---------------STEP2: SEPARATING NON-TAU FROM TAU using Classifier1 -------------------
Non_tau    36424
Tau         5210
Name: Class, dtype: int64
---------------STEP4: Tau hallmark cl

Non_tau    41850
Tau         1361
Name: Class, dtype: int64
---------------STEP4: Tau hallmark classification using Classifier2 -------------------
Others       1035
CB            204
Ambiguous      46
NFT            38
TA             38
Name: Class, dtype: int64
---------------STEP5: DATA EXTRACTION & EXPORT-------------------
No loss of cells?  True
Exported prediction of :  721771.svs Detections.txt
----------------------------------------------------------------------------
FILE 721772.svs Detections.txt Number:  24 / 160
---------------STEP1: DATA FILE-------------------
Read in data file: 721772.svs Detections.txt
Data shape is: (45323, 61)
---------------STEP2: SEPARATING NON-TAU FROM TAU using Classifier1 -------------------
Non_tau    36085
Tau         9238
Name: Class, dtype: int64
---------------STEP4: Tau hallmark classification using Classifier2 -------------------
Others       7676
CB           1097
Ambiguous     203
TA            150
NFT           112
Name: Class, dtype:

Others       7521
CB            638
TA            498
Ambiguous     445
NFT           198
Name: Class, dtype: int64
---------------STEP5: DATA EXTRACTION & EXPORT-------------------
No loss of cells?  True
Exported prediction of :  747313.svs Detections.txt
----------------------------------------------------------------------------
FILE 747316.svs Detections.txt Number:  35 / 160
---------------STEP1: DATA FILE-------------------
Read in data file: 747316.svs Detections.txt
Data shape is: (36440, 61)
---------------STEP2: SEPARATING NON-TAU FROM TAU using Classifier1 -------------------
Non_tau    18737
Tau        17703
Name: Class, dtype: int64
---------------STEP4: Tau hallmark classification using Classifier2 -------------------
Others       13821
CB            2248
TA             860
NFT            444
Ambiguous      330
Name: Class, dtype: int64
---------------STEP5: DATA EXTRACTION & EXPORT-------------------
No loss of cells?  True
Exported prediction of :  747316.svs Detection

No loss of cells?  True
Exported prediction of :  747364.svs Detections.txt
----------------------------------------------------------------------------
FILE 747366.svs Detections.txt Number:  46 / 160
---------------STEP1: DATA FILE-------------------
Read in data file: 747366.svs Detections.txt
Data shape is: (7575, 61)
---------------STEP2: SEPARATING NON-TAU FROM TAU using Classifier1 -------------------
Non_tau    6553
Tau        1022
Name: Class, dtype: int64
---------------STEP4: Tau hallmark classification using Classifier2 -------------------
Others       917
CB            84
Ambiguous     18
TA             2
NFT            1
Name: Class, dtype: int64
---------------STEP5: DATA EXTRACTION & EXPORT-------------------
No loss of cells?  True
Exported prediction of :  747366.svs Detections.txt
----------------------------------------------------------------------------
FILE 747367.svs Detections.txt Number:  47 / 160
---------------STEP1: DATA FILE-------------------
Read in data

Read in data file: 747390.svs Detections.txt
Data shape is: (678, 61)
---------------STEP2: SEPARATING NON-TAU FROM TAU using Classifier1 -------------------
Non_tau    453
Tau        225
Name: Class, dtype: int64
---------------STEP4: Tau hallmark classification using Classifier2 -------------------
Others       209
CB            10
Ambiguous      5
TA             1
Name: Class, dtype: int64
---------------STEP5: DATA EXTRACTION & EXPORT-------------------
No loss of cells?  True
Exported prediction of :  747390.svs Detections.txt
----------------------------------------------------------------------------
FILE 747813.svs Detections.txt Number:  58 / 160
---------------STEP1: DATA FILE-------------------
Read in data file: 747813.svs Detections.txt
Data shape is: (73479, 61)
---------------STEP2: SEPARATING NON-TAU FROM TAU using Classifier1 -------------------
Non_tau    72206
Tau         1273
Name: Class, dtype: int64
---------------STEP4: Tau hallmark classification using Classifie

Non_tau    42730
Tau         1684
Name: Class, dtype: int64
---------------STEP4: Tau hallmark classification using Classifier2 -------------------
Others       1573
CB             63
Ambiguous      26
TA             14
NFT             8
Name: Class, dtype: int64
---------------STEP5: DATA EXTRACTION & EXPORT-------------------
No loss of cells?  True
Exported prediction of :  747853.svs Detections.txt
----------------------------------------------------------------------------
FILE 747854.svs Detections.txt Number:  69 / 160
---------------STEP1: DATA FILE-------------------
Read in data file: 747854.svs Detections.txt
Data shape is: (38958, 61)
---------------STEP2: SEPARATING NON-TAU FROM TAU using Classifier1 -------------------
Non_tau    36913
Tau         2045
Name: Class, dtype: int64
---------------STEP4: Tau hallmark classification using Classifier2 -------------------
Others       1812
CB            140
TA             45
Ambiguous      34
NFT            14
Name: Class, dtype:

Others       11050
CB             977
TA             505
Ambiguous      289
NFT            189
Name: Class, dtype: int64
---------------STEP5: DATA EXTRACTION & EXPORT-------------------
No loss of cells?  True
Exported prediction of :  755546.svs Detections.txt
----------------------------------------------------------------------------
FILE 755547.svs Detections.txt Number:  80 / 160
---------------STEP1: DATA FILE-------------------
Read in data file: 755547.svs Detections.txt
Data shape is: (31188, 61)
---------------STEP2: SEPARATING NON-TAU FROM TAU using Classifier1 -------------------
Non_tau    27812
Tau         3376
Name: Class, dtype: int64
---------------STEP4: Tau hallmark classification using Classifier2 -------------------
Others       2922
CB            225
TA            107
Ambiguous      86
NFT            36
Name: Class, dtype: int64
---------------STEP5: DATA EXTRACTION & EXPORT-------------------
No loss of cells?  True
Exported prediction of :  755547.svs Detection

Exported prediction of :  760036.svs Detections.txt
----------------------------------------------------------------------------
FILE 760037.svs Detections.txt Number:  91 / 160
---------------STEP1: DATA FILE-------------------
Read in data file: 760037.svs Detections.txt
Data shape is: (42698, 61)
---------------STEP2: SEPARATING NON-TAU FROM TAU using Classifier1 -------------------
Non_tau    37181
Tau         5517
Name: Class, dtype: int64
---------------STEP4: Tau hallmark classification using Classifier2 -------------------
Others       3877
CB           1197
Ambiguous     211
NFT           176
TA             56
Name: Class, dtype: int64
---------------STEP5: DATA EXTRACTION & EXPORT-------------------
No loss of cells?  True
Exported prediction of :  760037.svs Detections.txt
----------------------------------------------------------------------------
FILE 760038.svs Detections.txt Number:  92 / 160
---------------STEP1: DATA FILE-------------------
Read in data file: 760038.sv

Read in data file: 760065.svs Detections.txt
Data shape is: (54926, 61)
---------------STEP2: SEPARATING NON-TAU FROM TAU using Classifier1 -------------------
Non_tau    52889
Tau         2037
Name: Class, dtype: int64
---------------STEP4: Tau hallmark classification using Classifier2 -------------------
Others       1731
CB            138
TA             79
NFT            48
Ambiguous      41
Name: Class, dtype: int64
---------------STEP5: DATA EXTRACTION & EXPORT-------------------
No loss of cells?  True
Exported prediction of :  760065.svs Detections.txt
----------------------------------------------------------------------------
FILE 760066.svs Detections.txt Number:  103 / 160
---------------STEP1: DATA FILE-------------------
Read in data file: 760066.svs Detections.txt
Data shape is: (37693, 61)
---------------STEP2: SEPARATING NON-TAU FROM TAU using Classifier1 -------------------
Non_tau    35963
Tau         1730
Name: Class, dtype: int64
---------------STEP4: Tau hallmark c

Non_tau    127379
Tau           772
Name: Class, dtype: int64
---------------STEP4: Tau hallmark classification using Classifier2 -------------------
Others       686
CB            59
Ambiguous     11
TA            10
NFT            6
Name: Class, dtype: int64
---------------STEP5: DATA EXTRACTION & EXPORT-------------------
No loss of cells?  True
Exported prediction of :  760084.svs Detections.txt
----------------------------------------------------------------------------
FILE 760085.svs Detections.txt Number:  114 / 160
---------------STEP1: DATA FILE-------------------
Read in data file: 760085.svs Detections.txt
Data shape is: (16693, 61)
---------------STEP2: SEPARATING NON-TAU FROM TAU using Classifier1 -------------------
Non_tau    16507
Tau          186
Name: Class, dtype: int64
---------------STEP4: Tau hallmark classification using Classifier2 -------------------
Others       169
CB             8
Ambiguous      5
NFT            3
TA             1
Name: Class, dtype: int64


Others       774
CB           117
Ambiguous     30
NFT           19
TA             8
Name: Class, dtype: int64
---------------STEP5: DATA EXTRACTION & EXPORT-------------------
No loss of cells?  True
Exported prediction of :  771762.svs Detections.txt
----------------------------------------------------------------------------
FILE 771790.svs Detections.txt Number:  125 / 160
---------------STEP1: DATA FILE-------------------
Read in data file: 771790.svs Detections.txt
Data shape is: (48009, 61)
---------------STEP2: SEPARATING NON-TAU FROM TAU using Classifier1 -------------------
Non_tau    25496
Tau        22513
Name: Class, dtype: int64
---------------STEP4: Tau hallmark classification using Classifier2 -------------------
Others       17481
CB            2533
TA            1261
NFT            625
Ambiguous      613
Name: Class, dtype: int64
---------------STEP5: DATA EXTRACTION & EXPORT-------------------
No loss of cells?  True
Exported prediction of :  771790.svs Detections.tx

Others       1451
CB            102
Ambiguous      30
TA             21
NFT            10
Name: Class, dtype: int64
---------------STEP5: DATA EXTRACTION & EXPORT-------------------
No loss of cells?  True
Exported prediction of :  771838.svs Detections.txt
----------------------------------------------------------------------------
FILE 771849.svs Detections.txt Number:  136 / 160
---------------STEP1: DATA FILE-------------------
Read in data file: 771849.svs Detections.txt
Data shape is: (89137, 61)
---------------STEP2: SEPARATING NON-TAU FROM TAU using Classifier1 -------------------
Non_tau    45131
Tau        44006
Name: Class, dtype: int64
---------------STEP4: Tau hallmark classification using Classifier2 -------------------
Others       33888
CB            6747
TA            1471
Ambiguous      984
NFT            916
Name: Class, dtype: int64
---------------STEP5: DATA EXTRACTION & EXPORT-------------------
No loss of cells?  True
Exported prediction of :  771849.svs Detectio

Exported prediction of :  771893.svs Detections.txt
----------------------------------------------------------------------------
FILE 771894.svs Detections.txt Number:  147 / 160
---------------STEP1: DATA FILE-------------------
Read in data file: 771894.svs Detections.txt
Data shape is: (26179, 61)
---------------STEP2: SEPARATING NON-TAU FROM TAU using Classifier1 -------------------
Tau        18056
Non_tau     8123
Name: Class, dtype: int64
---------------STEP4: Tau hallmark classification using Classifier2 -------------------
Others       13749
CB            2252
TA            1042
Ambiguous      678
NFT            335
Name: Class, dtype: int64
---------------STEP5: DATA EXTRACTION & EXPORT-------------------
No loss of cells?  True
Exported prediction of :  771894.svs Detections.txt
----------------------------------------------------------------------------
FILE 771895.svs Detections.txt Number:  148 / 160
---------------STEP1: DATA FILE-------------------
Read in data file: 77

Read in data file: 772019.svs Detections.txt
Data shape is: (4861, 61)
---------------STEP2: SEPARATING NON-TAU FROM TAU using Classifier1 -------------------
Non_tau    4213
Tau         648
Name: Class, dtype: int64
---------------STEP4: Tau hallmark classification using Classifier2 -------------------
Others       597
CB            30
Ambiguous     11
TA             6
NFT            4
Name: Class, dtype: int64
---------------STEP5: DATA EXTRACTION & EXPORT-------------------
No loss of cells?  True
Exported prediction of :  772019.svs Detections.txt
----------------------------------------------------------------------------
FILE 772020.svs Detections.txt Number:  159 / 160
---------------STEP1: DATA FILE-------------------
Read in data file: 772020.svs Detections.txt
Data shape is: (2729, 61)
---------------STEP2: SEPARATING NON-TAU FROM TAU using Classifier1 -------------------
Non_tau    2399
Tau         330
Name: Class, dtype: int64
---------------STEP4: Tau hallmark classificati