# Purpose: To compare the automatically segmented cells to the manual segmentations to score them based on (1) count and (2) total area of segmentation

Date Created: January 7, 2022

Dates Edited: 

__OGD Severity Study__

Step 1: Get the count and total area segmentation of each of the automatically segmented images

*Step 1a: User inputs*

In [1]:
import os
import numpy as np
from skimage import measure
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from skimage import filters
from skimage import morphology
from scipy import ndimage

In [2]:
folder_location = '/Users/hhelmbre/Desktop/ogd_severity_undergrad/10_4_21_redownload/all_thresh'

In [3]:
# go through folder to get all file locations
threshold_paths = []
for file in os.listdir(folder_location):
    if file.endswith(".npy"):
        file_name = os.path.join(folder_location, file)
        threshold_paths.append(file_name)

In [4]:
#Defining a properties list for sci-kit image region props to use to get count (label) and area
properties_list = ('label', 'area')
j = 0
for names in threshold_paths:
        array = np.load(names)
        label_image = measure.label(array, connectivity=array.ndim)
        total_area = np.count_nonzero(label_image)
        props = measure.regionprops_table(label_image, properties=(properties_list))

        if j == 0:
            df = pd.DataFrame(props)
            df['filepath'] = names
            
            file_name_short = names.rsplit('/', 1)
            file_name_short = file_name_short[1]
            file_name_short = file_name_short.rsplit('.', 1)
            file_name_short1 = file_name_short[0]
            file_name_short2 = file_name_short1.rsplit('_', 2)
            file_name_short3 = file_name_short2[1]
            file_name_short4 = file_name_short2[0]
            df['threshold_method'] = file_name_short3
            df['filename'] = file_name_short4
            df['total_area'] = total_area

        else:
            df2 = pd.DataFrame(props)
            df2['filepath'] = names
            
            
            file_name_short = names.rsplit('/', 1)
            file_name_short = file_name_short[1]
            file_name_short = file_name_short.rsplit('.', 1)
            file_name_short1 = file_name_short[0]
            file_name_short2 = file_name_short1.rsplit('_', 2)
            file_name_short3 = file_name_short2[1]
            file_name_short4 = file_name_short2[0]
            df2['threshold_method'] = file_name_short3
            df2['filename'] =file_name_short4
            df2['total_area'] = total_area
            df = df.append(df2)
        
        j = 1

Looking at the dataframe:

In [27]:
df.sort_values(by=['filename'])

Unnamed: 0,label,area,filepath,threshold_method,filename,total_area
44,45,100,/Users/hhelmbre/Desktop/ogd_severity_undergrad...,yen,4-50-10_40x_cortex_5,27726
20,21,145,/Users/hhelmbre/Desktop/ogd_severity_undergrad...,otsu,4-50-10_40x_cortex_5,28423
19,20,444,/Users/hhelmbre/Desktop/ogd_severity_undergrad...,otsu,4-50-10_40x_cortex_5,28423
18,19,188,/Users/hhelmbre/Desktop/ogd_severity_undergrad...,otsu,4-50-10_40x_cortex_5,28423
17,18,681,/Users/hhelmbre/Desktop/ogd_severity_undergrad...,otsu,4-50-10_40x_cortex_5,28423
...,...,...,...,...,...,...
4,5,1177,/Users/hhelmbre/Desktop/ogd_severity_undergrad...,otsu,4-56-9_40x_thalamus_4,5242
3,4,366,/Users/hhelmbre/Desktop/ogd_severity_undergrad...,otsu,4-56-9_40x_thalamus_4,5242
2,3,567,/Users/hhelmbre/Desktop/ogd_severity_undergrad...,otsu,4-56-9_40x_thalamus_4,5242
2,3,567,/Users/hhelmbre/Desktop/ogd_severity_undergrad...,isodata,4-56-9_40x_thalamus_4,5243


In [6]:
#Getting the counts for each threshold type and filename
count_series = df.groupby(['filename', 'threshold_method']).size()
count_series_df = pd.DataFrame(count_series).reset_index()
count_series_df = count_series_df.rename(columns = {0: 'automated_cell_count'})
count_series_df

Unnamed: 0,filename,threshold_method,automated_cell_count
0,4-50-10_40x_cortex_5,isodata,44
1,4-50-10_40x_cortex_5,li,71
2,4-50-10_40x_cortex_5,mean,73
3,4-50-10_40x_cortex_5,otsu,44
4,4-50-10_40x_cortex_5,triangle,88
...,...,...,...
136,4-56-9_40x_thalamus_4,li,5
137,4-56-9_40x_thalamus_4,mean,8
138,4-56-9_40x_thalamus_4,otsu,5
139,4-56-9_40x_thalamus_4,triangle,6


In [7]:
#here the total_area calculated by pixels and the area calculated by the individual labeled cells are a check
area_series = df.groupby(['filename', 'threshold_method','total_area']).sum()
area_series_df = pd.DataFrame(area_series).reset_index()
area_series_df = area_series_df.rename(columns={'filename': 'filename1', 'threshold_method': 'threshold_method1', 'total_area': 'total_area1', 'area': 'auto_total_area'})
area_series_df

Unnamed: 0,filename1,threshold_method1,total_area1,label,auto_total_area
0,4-50-10_40x_cortex_5,isodata,28448,990,28448
1,4-50-10_40x_cortex_5,li,51821,2556,51821
2,4-50-10_40x_cortex_5,mean,61936,2701,61936
3,4-50-10_40x_cortex_5,otsu,28423,990,28423
4,4-50-10_40x_cortex_5,triangle,80662,3916,80662
...,...,...,...,...,...
136,4-56-9_40x_thalamus_4,li,6623,15,6623
137,4-56-9_40x_thalamus_4,mean,9185,36,9185
138,4-56-9_40x_thalamus_4,otsu,5242,15,5242
139,4-56-9_40x_thalamus_4,triangle,7832,21,7832


In [8]:
automated_thresh_df = pd.concat([count_series_df, area_series_df], axis=1)
automated_thresh_df

Unnamed: 0,filename,threshold_method,automated_cell_count,filename1,threshold_method1,total_area1,label,auto_total_area
0,4-50-10_40x_cortex_5,isodata,44,4-50-10_40x_cortex_5,isodata,28448,990,28448
1,4-50-10_40x_cortex_5,li,71,4-50-10_40x_cortex_5,li,51821,2556,51821
2,4-50-10_40x_cortex_5,mean,73,4-50-10_40x_cortex_5,mean,61936,2701,61936
3,4-50-10_40x_cortex_5,otsu,44,4-50-10_40x_cortex_5,otsu,28423,990,28423
4,4-50-10_40x_cortex_5,triangle,88,4-50-10_40x_cortex_5,triangle,80662,3916,80662
...,...,...,...,...,...,...,...,...
136,4-56-9_40x_thalamus_4,li,5,4-56-9_40x_thalamus_4,li,6623,15,6623
137,4-56-9_40x_thalamus_4,mean,8,4-56-9_40x_thalamus_4,mean,9185,36,9185
138,4-56-9_40x_thalamus_4,otsu,5,4-56-9_40x_thalamus_4,otsu,5242,15,5242
139,4-56-9_40x_thalamus_4,triangle,6,4-56-9_40x_thalamus_4,triangle,7832,21,7832


In [9]:
automated_thresh_df['filename'].equals(automated_thresh_df['filename1'])

True

In [10]:
automated_thresh_df['threshold_method'].equals(automated_thresh_df['threshold_method1'])

True

In [11]:
automated_thresh_df = automated_thresh_df.drop(['filename1', 'threshold_method1', 'total_area1', 'label'], axis=1)

In [12]:
automated_thresh_df

Unnamed: 0,filename,threshold_method,automated_cell_count,auto_total_area
0,4-50-10_40x_cortex_5,isodata,44,28448
1,4-50-10_40x_cortex_5,li,71,51821
2,4-50-10_40x_cortex_5,mean,73,61936
3,4-50-10_40x_cortex_5,otsu,44,28423
4,4-50-10_40x_cortex_5,triangle,88,80662
...,...,...,...,...
136,4-56-9_40x_thalamus_4,li,5,6623
137,4-56-9_40x_thalamus_4,mean,8,9185
138,4-56-9_40x_thalamus_4,otsu,5,5242
139,4-56-9_40x_thalamus_4,triangle,6,7832


In [13]:
automated_thresh_df.to_csv('/Users/hhelmbre/Desktop/ogd_severity_undergrad/10_4_21_redownload/all_thresh/ogd_severity_auto_thresh_data.csv')

Step 2: Import average count and average total area

In [14]:
compiled_man_counts_path = '/Users/hhelmbre/Desktop/microfiber/ogd_severity_study_man_compile.csv'

In [15]:
manual_df = pd.read_csv(compiled_man_counts_path)

In [16]:
manual_df

Unnamed: 0,Filename,Average Manual Count,Average Area
0,4-50-10_40x_cortex_5,57.0,64127.0
1,4-50-15_40x_cortex_2,172.0,83631.0
2,4-50-7_40x_cortex_5,115.0,50713.0
3,4-50-10_40x_hippocampus_1,102.0,48673.0
4,4-50-15_40x_hippocampus_4,125.0,84403.0
5,4-50-15_40x_hippocampus_1,102.0,79664.0
6,4-50-10_40x_thalamus_3,135.0,79079.0
7,4-50-15_40x_thalamus_2,75.0,95088.0
8,4-56-6_40x_cortex_2,2.0,1212.0
9,4-56-8_40x_cortex_3,24.0,22234.0


Step 3: Compare the scores

In [17]:
automated_thresh_df_len = len(automated_thresh_df)

In [18]:
count_scores_list = []
area_scores_list = []
for number in range(0,automated_thresh_df_len):
    auto_df = automated_thresh_df.loc[[number]].reset_index()
    auto_name = auto_df['filename'][0]
    mini_df = manual_df.loc[manual_df['Filename'] == auto_name].reset_index()
    count_score = abs((auto_df['automated_cell_count'][0] - mini_df['Average Manual Count'][0])/(mini_df['Average Manual Count'][0]))*100
    count_scores_list.append(count_score)
    
    area_score = abs((auto_df['auto_total_area'][0] - mini_df['Average Area'][0])/(mini_df['Average Area'][0]))*100
    area_scores_list.append(area_score)

  import sys


In [23]:
automated_thresh_df['count_scores'] = count_scores_list
automated_thresh_df['area_scores'] = area_scores_list
automated_thresh_df['total_score_error'] = automated_thresh_df['count_scores'] + automated_thresh_df['area_scores']
automated_thresh_df.sort_values(by=['filename'])

Unnamed: 0,filename,threshold_method,automated_cell_count,auto_total_area,count_scores,area_scores,total_score_error
0,4-50-10_40x_cortex_5,isodata,44,28448,22.807018,55.638031,78.445048
1,4-50-10_40x_cortex_5,li,71,51821,24.561404,19.190045,43.751448
2,4-50-10_40x_cortex_5,mean,73,61936,28.070175,3.416658,31.486833
3,4-50-10_40x_cortex_5,otsu,44,28423,22.807018,55.677016,78.484033
4,4-50-10_40x_cortex_5,triangle,88,80662,54.385965,25.784771,80.170736
...,...,...,...,...,...,...,...
135,4-56-9_40x_thalamus_4,isodata,5,5243,0.000000,26.722572,26.722572
136,4-56-9_40x_thalamus_4,li,5,6623,0.000000,7.435360,7.435360
137,4-56-9_40x_thalamus_4,mean,8,9185,60.000000,28.371768,88.371768
138,4-56-9_40x_thalamus_4,otsu,5,5242,0.000000,26.736548,26.736548


In [20]:
pd.set_option('mode.use_inf_as_na', True)

In [22]:
automated_thresh_df.dropna(inplace=True)
final_series = automated_thresh_df.groupby(['threshold_method']).sum()
final_series_df = pd.DataFrame(final_series).reset_index()
final_series_df.sort_values(by=['total_score_error'])

Unnamed: 0,threshold_method,automated_cell_count,auto_total_area,count_scores,area_scores,total_score_error
3,minimum,27,5818,168.954248,191.92479,360.879039
6,yen,529,341956,885.301617,1132.231421,2017.533038
5,triangle,942,903686,1196.952275,1135.361117,2332.313392
4,otsu,1222,471637,1536.115642,1164.42553,2700.541172
0,isodata,1244,478664,1699.060086,1278.210003,2977.270089
1,li,1651,1043027,3170.985947,3277.633936,6448.619883
2,mean,1759,1272361,6447.209822,5868.791403,12316.001226


In the above triangle and minimum were removed as they did not run on all images - this also caused the lower total_score_error as it is just summative.

### Yen is the winner for the OGD severity data set