### Let's look at PSP hema staining

In [1]:
# load important libraries
import sys
sys.path.insert(0,
                '/Users/mokur/OneDrive - University of Cambridge/Attachments/Jan2023/Cell_pipeline/Cell_classification/')
from base import *
from constants import *
import joblib 

In [2]:
import pandas as pd
import numpy as np

In [1]:
# functions

# to extract top 10% of predicted cells & check DAB value
def check_hema_dab(file_list,
                   file_path,
                   percentile):
    describe_list = []
    mean_hema = []
    to_exclude = ['Ambiguous','Excluded','Unlabelled']
    for i in file_list[0]:
        dat_orig = pd.read_csv(file_path + i,sep="\t")
        
        dat = dat_orig[['Centroid_X','Centroid_Y','Hematoxylin: Nucleus: Mean','DAB: Nucleus: Mean','Class']]

        # select only predicted class 
        dat_selected = dat[~dat['Class'].isin(to_exclude)]

        # select only top 10% of predicted cells with high hematoxylin staining intensity 
        val = np.percentile(dat_selected['Hematoxylin: Nucleus: Mean'],percentile)
        dat_selected2 = dat_selected[dat_selected['Hematoxylin: Nucleus: Mean']>=val]
        mean_hema.append(np.mean(dat_selected2['Hematoxylin: Nucleus: Mean']))
        description = dat_selected2['DAB: Nucleus: Mean'].describe()
        describe_list.append(description)
    return describe_list, mean_hema

# To summarise info from check_hema_dab
def dab_hema_summary(describe_list, mean_hema,file_list): 
    print('---------Mean DAB across slides-----') # abit too conservative - will miss out quite a lot of tau negative cells
    print('max of mean: ',np.max([i['mean'] for i in describe_list]))
    print('min of mean: ',np.min([i['mean'] for i in describe_list]))
    print('mean of mean: ',np.mean([i['mean'] for i in describe_list]))
    print('---------75% DAB across slides---------')  # probably a good compromise
    print('max of 75%: ',np.max([i['75%'] for i in describe_list]))
    print('min of 75%: ',np.min([i['75%'] for i in describe_list]))
    print('mean of 75%: ',np.mean([i['75%'] for i in describe_list]))
    print('---------Max DAB across slides---------') # these are likely artefacts or some cells have tau ?  
    print('max of max: ',np.max([i['max'] for i in describe_list]))
    print('min of max: ',np.min([i['max'] for i in describe_list]))
    print('mean of max: ',np.mean([i['max'] for i in describe_list]))
    
    # Now we will find a slide with max pigmentation (from top 10% highly pigmented cells in the slide)
    i_max = mean_hema.index(np.max(mean_hema))
    #  & a slide with min pigmentation
    i_min = mean_hema.index(np.min(mean_hema))
    # print slide number 
    print(file_list[0][i_max],np.max(mean_hema))
    print('DAB of highly pigmented slide:', describe_list[i_max])
    print(file_list[0][i_min],np.min(mean_hema))
    print('DAB of least pigmented slide:', describe_list[i_min])


## PSP stage 2

**Cortical slides** These slides are unlikely to have tau

In [34]:
cortical_list = pd.read_csv('C:/Users/mokur/OneDrive/Desktop/Digital_path/Cell_pipeline/Predictions/Cortical/psp_2_cortical.txt',sep='\t',header=None)
file_path = 'C:/Users/mokur/OneDrive/Desktop/Digital_path/Cell_pipeline/Predictions/Cortical/'

In [44]:
# Select top 10% of highly pigmented hema cells, check value of DAB: mean, max, 75% percentile 
describe_cortical, cortical_mean_hema = check_hema_dab(file_list = cortical_list,
                                   file_path = file_path,
                                   percentile=90)

In [2]:
# Of the top 10% of highly pigmented hema cells, let's inspect DAB values
# mean = mean value from each slide 
# 75% = 75% percentile value from each slide
# max = max value from each slide
dab_hema_summary(describe_list = describe_cortical,
                  mean_hema = cortical_mean_hema,
                  file_list = cortical_list)

NameError: name 'describe_cortical' is not defined

## PSP stage 5

**Cortical slides** These slides are likely to have quite heavy tau

In [51]:
cortical_list = pd.read_csv('C:/Users/mokur/OneDrive/Desktop/Digital_path/Cell_pipeline/Predictions/Cortical/psp_5_cortical.txt',sep='\t',header=None)
file_path = 'C:/Users/mokur/OneDrive/Desktop/Digital_path/Cell_pipeline/Predictions/Cortical/'

In [52]:
# Select top 10% of highly pigmented hema cells, check value of DAB: mean, max, 75% percentile 
describe_cortical, cortical_mean_hema = check_hema_dab(file_list = cortical_list,
                                   file_path = file_path,
                                   percentile=90)

In [54]:
dab_hema_summary(describe_list = describe_cortical,
                  mean_hema = cortical_mean_hema,
                  file_list = cortical_list)

---------Mean DAB across slides-----
max of mean:  0.3961681922196796
min of mean:  0.10415124595610736
mean of mean:  0.15987785389863027
---------75% DAB across slides---------
max of 75%:  0.643575
min of 75%:  0.1147
mean of 75%:  0.16992534722222222
---------Max DAB across slides---------
max of max:  1.1371
min of max:  0.6714
mean of max:  0.9523222222222222
755557.svs_predictions.txt 0.7619519522681407
771861.svs_predictions.txt 0.3874764013283687


**Let's look at BG slides**

In [56]:
bg_list = pd.read_csv('C:/Users/mokur/OneDrive/Desktop/Digital_path/Cell_pipeline/Predictions/BG/psp_5_bg.txt',sep='\t',header=None)
bg_path = 'C:/Users/mokur/OneDrive/Desktop/Digital_path/Cell_pipeline/Predictions/BG/'

In [57]:
# Select top 10% of highly pigmented hema cells, check value of DAB: mean, max, 75% percentile 
describe_bg, bg_mean_hema = check_hema_dab(file_list = bg_list,
                                   file_path = bg_path,
                                   percentile=90)

In [58]:
dab_hema_summary(describe_list = describe_bg,
                  mean_hema = bg_mean_hema,
                  file_list = bg_list)

---------Mean DAB across slides-----
max of mean:  0.20130028059701494
min of mean:  0.10667785664749319
mean of mean:  0.15912049518622481
---------75% DAB across slides---------
max of 75%:  0.2095
min of 75%:  0.105
mean of 75%:  0.17057499999999998
---------Max DAB across slides---------
max of max:  1.1578
min of max:  0.709
mean of max:  1.0299846153846155
747870.svs_predictions.txt 0.7961706178160919
771914.svs_predictions.txt 0.5462703431176518
