### Quantify cell density 

In [33]:
# Import libraries 
import sys
sys.path.insert(0,
                '/Users/mokur/OneDrive - University of Cambridge/Attachments/Jan2023/Cell_pipeline/Cell_quantification/')
from helper_functions import *

In [34]:
# Read in list of prediction files 
file_path_pred = 'C:/Users/mokur/OneDrive/Desktop/Digital_path/Cell_pipeline/Predictions/BG/'
prediction_list = pd.read_csv(file_path_pred+'predictions.txt', sep=' ',header=None)[0]

In [35]:
# Import metadata 
file_path_meta = '/Users/mokur/OneDrive - University of Cambridge/Attachments/Jan2023/Cell_pipeline/Metadata/'
metadata_ = pd.read_csv(file_path_meta+'cell_project_metadata_withComments.txt', sep='\t')


In [36]:
output_name = 'cell_counts_BG_negative_027.txt'

**Create dataframe with cell counts in grey matter: FOR occipital & cortical only** 

In [21]:
cell_counts_df = calculate_cell_count_df(file_path_pred,
                                          prediction_list,
                                          area='Grey_matter')

In [22]:
cell_counts_df.head()

Unnamed: 0,Image_name,Astro,Neuron,Oligo,Others,Ambiguous,Total,p_Astro,p_Neuron,p_Oligo,p_Others
0,703521.svs,22683,56649,33077,108547,19912,220956,0.102658,0.256381,0.149699,0.491261
1,721703.svs,58291,61435,52204,80152,21111,252082,0.231238,0.24371,0.207091,0.31796
2,721770.svs,20799,42721,26438,49926,12351,139884,0.148687,0.305403,0.188999,0.35691
3,721855.svs,55118,57377,57160,76533,23437,246188,0.223886,0.233062,0.23218,0.310872
4,747361.svs,25999,55825,26029,67186,15174,175039,0.148533,0.318929,0.148704,0.383834


Create dataframe with tau positive counts in grey matter

In [23]:
tau_negative_counts_df, f = calculate_tau_negative_cell_count_df_missing(file_path=file_path_pred,
                                                              prediction_list=prediction_list,
                                                              tau_positive_threshold=0.27,
                                                              dab_feature= 'DAB: Nucleus: Mean',  # But this is prone to issue with channel bleeding
                                                              area='Grey_matter'
                                                              )

In [24]:
f

[]

In [25]:
tau_negative_counts_df.head()

Unnamed: 0,Image_name,Astro-,Neuron-,Oligo-,Others-,Ambiguous-
0,703521.svs,22668,56635,33010,108126,19792
1,721703.svs,58278,61406,52186,80022,21079
2,721770.svs,20766,42665,26401,49519,12279
3,721855.svs,55085,57324,57072,76224,23320
4,747361.svs,25992,55798,25896,66609,14996


Combine two dataframes

In [26]:
combined = cell_counts_df.merge(tau_negative_counts_df,
                                on=['Image_name'])
combined.head()

Unnamed: 0,Image_name,Astro,Neuron,Oligo,Others,Ambiguous,Total,p_Astro,p_Neuron,p_Oligo,p_Others,Astro-,Neuron-,Oligo-,Others-,Ambiguous-
0,703521.svs,22683,56649,33077,108547,19912,220956,0.102658,0.256381,0.149699,0.491261,22668,56635,33010,108126,19792
1,721703.svs,58291,61435,52204,80152,21111,252082,0.231238,0.24371,0.207091,0.31796,58278,61406,52186,80022,21079
2,721770.svs,20799,42721,26438,49926,12351,139884,0.148687,0.305403,0.188999,0.35691,20766,42665,26401,49519,12279
3,721855.svs,55118,57377,57160,76533,23437,246188,0.223886,0.233062,0.23218,0.310872,55085,57324,57072,76224,23320
4,747361.svs,25999,55825,26029,67186,15174,175039,0.148533,0.318929,0.148704,0.383834,25992,55798,25896,66609,14996


Merge data with metadata

In [27]:
metadata = metadata_.copy()
metadata.loc[:,'Image_name'] = [i[1:7]+'.svs' for i in metadata_['Slice_ID']]

In [28]:
metadata_subset = metadata[['Patient_ID','Image_name','region_name','Comments','Diagnosis']]

In [29]:
data1 = combined.merge(metadata_subset,on=['Image_name'])
data1.head()

Unnamed: 0,Image_name,Astro,Neuron,Oligo,Others,Ambiguous,Total,p_Astro,p_Neuron,p_Oligo,p_Others,Astro-,Neuron-,Oligo-,Others-,Ambiguous-,Patient_ID,region_name,Comments,Diagnosis
0,703521.svs,22683,56649,33077,108547,19912,220956,0.102658,0.256381,0.149699,0.491261,22668,56635,33010,108126,19792,NP18-00257,occipital,novel,PSP
1,721703.svs,58291,61435,52204,80152,21111,252082,0.231238,0.24371,0.207091,0.31796,58278,61406,52186,80022,21079,NP17-00136,occipital,training,PSP
2,721770.svs,20799,42721,26438,49926,12351,139884,0.148687,0.305403,0.188999,0.35691,20766,42665,26401,49519,12279,NP17-00082,occipital,training,PSP
3,721855.svs,55118,57377,57160,76533,23437,246188,0.223886,0.233062,0.23218,0.310872,55085,57324,57072,76224,23320,NP17-00242,occipital,novel,PSP
4,747361.svs,25999,55825,26029,67186,15174,175039,0.148533,0.318929,0.148704,0.383834,25992,55798,25896,66609,14996,NP18-00004,occipital,novel,PSP


In [30]:
data1 = data1.rename(columns={'Astro+':'Astro-'})
data1 = data1.rename(columns={'Neuron+':'Neuron-'})
data1 = data1.rename(columns={'Oligo+':'Oligo-'})
data1 = data1.rename(columns={'Others+':'Others-'})
data1 = data1.rename(columns={'Ambiguous+':'Ambiguous-'})

In [31]:
data1.columns

Index(['Image_name', 'Astro', 'Neuron', 'Oligo', 'Others', 'Ambiguous',
       'Total', 'p_Astro', 'p_Neuron', 'p_Oligo', 'p_Others', 'Astro-',
       'Neuron-', 'Oligo-', 'Others-', 'Ambiguous-', 'Patient_ID',
       'region_name', 'Comments', 'Diagnosis'],
      dtype='object')

In [37]:
# Export data out 
path_final = 'C:/Users/mokur/OneDrive - University of Cambridge/Attachments/Jan2023/Cell_pipeline/Cell_quantification/Data/' + output_name
data1.to_csv(path_final, sep='\t',index=False)

**For Basal ganglia**

Cell counts

In [38]:
cell_counts_STR = calculate_cell_count_df(file_path_pred,
                                          prediction_list,
                                          area='Striatum')
cell_counts_GP = calculate_cell_count_df(file_path_pred,
                                          prediction_list,
                                          area='Globus Pallidus')
cell_counts_STN, faulty = calculate_cell_count_df_missing(file_path_pred,
                                          prediction_list,
                                          area='Subthalamic Nucleus')

In [39]:
faulty # these files do not have subthalamic nucleus

['747308.svs',
 '747828.svs',
 '747871.svs',
 '755497.svs',
 '771775.svs',
 '771885.svs']

In [40]:
# Add region name
cell_counts_STN.loc[:,'region_name'] = ['Subthalamic Nucleus'] * cell_counts_STN.shape[0]
cell_counts_GP.loc[:,'region_name'] = ['Globus Pallidus'] * cell_counts_GP.shape[0]
cell_counts_STR.loc[:,'region_name'] = ['Striatum'] * cell_counts_STR.shape[0]

In [41]:
# Stack all the nuclei results together 
combined1 = pd.concat([cell_counts_STN,
                      cell_counts_STR,
                      cell_counts_GP])

Tau positive cell counts

In [42]:
tau_negative_counts_STN, faulty = calculate_tau_negative_cell_count_df_missing(file_path=file_path_pred,
                                                              prediction_list=prediction_list,
                                                              tau_positive_threshold=0.27,
                                                              dab_feature= 'DAB: Nucleus: Mean',  # But this is prone to issue with channel bleeding
                                                              area='Subthalamic Nucleus'
                                                              )

In [43]:
faulty

['747308.svs',
 '747828.svs',
 '747871.svs',
 '755497.svs',
 '771775.svs',
 '771885.svs']

In [44]:
tau_negative_counts_STR,f = calculate_tau_negative_cell_count_df_missing(file_path=file_path_pred,
                                                              prediction_list=prediction_list,
                                                              tau_positive_threshold=0.27,
                                                              dab_feature= 'DAB: Nucleus: Mean',  # But this is prone to issue with channel bleeding
                                                              area='Striatum'
                                                              )
tau_negative_counts_GP,f = calculate_tau_negative_cell_count_df_missing(file_path=file_path_pred,
                                                              prediction_list=prediction_list,
                                                              tau_positive_threshold=0.27,
                                                              dab_feature= 'DAB: Nucleus: Mean',  # But this is prone to issue with channel bleeding
                                                              area='Globus Pallidus'
                                                              )


In [45]:
# Add region name
tau_negative_counts_STN.loc[:,'region_name'] = ['Subthalamic Nucleus'] * tau_negative_counts_STN.shape[0]
tau_negative_counts_GP.loc[:,'region_name'] = ['Globus Pallidus'] * tau_negative_counts_GP.shape[0]
tau_negative_counts_STR.loc[:,'region_name'] = ['Striatum'] * tau_negative_counts_STR.shape[0]

In [46]:
# Stack all the nuclei results together 
combined2 = pd.concat([tau_negative_counts_STR,
                      tau_negative_counts_GP,
                      tau_negative_counts_STN])

In [47]:
combined = combined1.merge(combined2,on=['Image_name','region_name'])

In [48]:
combined.head()

Unnamed: 0,Image_name,Astro,Neuron,Oligo,Others,Ambiguous,Total,p_Astro,p_Neuron,p_Oligo,p_Others,region_name,Astro-,Neuron-,Oligo-,Others-,Ambiguous-
0,721708.svs,829,256,3304,2247,614,6636,0.124925,0.038577,0.49789,0.338608,Subthalamic Nucleus,800,224,3281,2083,589
1,721866.svs,1241,224,2025,1454,421,4944,0.251011,0.045307,0.409587,0.294094,Subthalamic Nucleus,1226,209,2017,1385,403
2,722594.svs,1845,822,4132,13077,1968,19876,0.092826,0.041356,0.207889,0.657929,Subthalamic Nucleus,1549,657,3775,7417,1622
3,747131.svs,1899,518,3259,5886,888,11562,0.164245,0.044802,0.281872,0.509081,Subthalamic Nucleus,1836,378,3214,4678,808
4,747293.svs,1813,1097,6089,7203,1247,16202,0.1119,0.067708,0.375818,0.444575,Subthalamic Nucleus,1674,933,5986,6395,1142


Merge results with metadata

In [49]:
metadata = metadata_.copy()
metadata.loc[:,'Image_name'] = [i[1:7]+'.svs' for i in metadata_['Slice_ID']]

In [50]:
metadata_subset = metadata[['Patient_ID','Image_name','Comments','Diagnosis']]

In [51]:
data1 = combined.merge(metadata_subset,on=['Image_name'])
data1.head()

Unnamed: 0,Image_name,Astro,Neuron,Oligo,Others,Ambiguous,Total,p_Astro,p_Neuron,p_Oligo,p_Others,region_name,Astro-,Neuron-,Oligo-,Others-,Ambiguous-,Patient_ID,Comments,Diagnosis
0,721708.svs,829,256,3304,2247,614,6636,0.124925,0.038577,0.49789,0.338608,Subthalamic Nucleus,800,224,3281,2083,589,NP17-00136,training,PSP
1,721708.svs,12660,16721,50050,26568,7158,105999,0.119435,0.157747,0.472174,0.250644,Striatum,12604,16682,49991,26307,7107,NP17-00136,training,PSP
2,721708.svs,7005,1278,30273,19675,4350,58231,0.120297,0.021947,0.519878,0.337878,Globus Pallidus,6848,1183,30148,18471,4212,NP17-00136,training,PSP
3,721866.svs,1241,224,2025,1454,421,4944,0.251011,0.045307,0.409587,0.294094,Subthalamic Nucleus,1226,209,2017,1385,403,NP17-00242,novel,PSP
4,721866.svs,9898,14478,25972,22273,5663,72621,0.136297,0.199364,0.357638,0.306702,Striatum,9805,14397,25908,21631,5585,NP17-00242,novel,PSP


In [52]:
# Export data out 
path_final = 'C:/Users/mokur/OneDrive - University of Cambridge/Attachments/Jan2023/Cell_pipeline/Cell_quantification/Data/' + output_name
data1.to_csv(path_final, sep='\t',index=False)