In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np 
import glob
import nibabel as nb
import itertools
import json
import sys
import statsmodels.api as sm
from statsmodels.formula.api import ols

from scipy import stats
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder


%matplotlib inline

In [2]:
flatten = lambda l: [item for sublist in l for item in sublist]

In [29]:
def vol_counts(model_props):   
    sample_count_dict = {}
    base_dir, proj, model, dataset, report_id = model_props
    sample_dir = f'{base_dir}/projects/{proj}/outs/{proj}_{dataset}_{model}/{proj}_{dataset}_{model}_predictions_{dataset}/*_samples/**'
    mean_segs_dir = f'{base_dir}/projects/{proj}/outs/{proj}_{dataset}_{model}/{proj}_{dataset}_{model}_predictions_{dataset}/*_seg.nii.gz'
    seg_paths = glob.glob(sample_dir)
    mean_seg_paths = glob.glob(mean_segs_dir)
    seg_paths.extend(mean_seg_paths)
    for r_id, sample in enumerate(seg_paths):
        print("\rProcessing {} {}/{}.".format(proj, r_id, len(seg_paths)), end="")

        split_str = sample.split('/')[-1].split('.')[0]
        v_id = split_str.split('_')[0]
        sample_id = split_str.split('_')[-1]
        nifti_file = nb.load(sample)
        header = nifti_file.header
        pix_volume = np.prod([2,2,3])
        s_data = nifti_file.get_fdata()
        unique_vals, counts = np.unique(s_data, return_counts=True)
        if v_id not in sample_count_dict.keys():
            sample_count_dict[v_id] = {str(i)+'_spleen':None for i in range(10)}
            sample_count_dict[v_id].update({str(i)+'_liver':None for i in range(10)})
        if sample_id == 'seg':
            sample_count_dict[v_id]['seg_liver'] = np.round(counts[2]*pix_volume)
            sample_count_dict[v_id]['seg_spleen'] = np.round(counts[1]*pix_volume)
        else:
            sample_count_dict[v_id][str(sample_id)+'_liver'] = counts[2]    
            sample_count_dict[v_id][str(sample_id)+'_spleen'] = counts[1] 
    
    print('')
    df = pd.DataFrame.from_dict(sample_count_dict, orient="index")
    path_to_save = f'{base_dir}/projects/{proj}/reports/{proj}_{dataset}_{model}/{dataset}/{report_id}_sample_count_report.csv'
    df.to_csv(path_to_save, index_label='volume_id')
    return {f'{proj}': path_to_save}

def ground_truth_vol_counts(dataset_props, vols_to_look=None, vols_to_look_type='all'):
    sample_count_dict = {}
    base_dir, glob_dir, dataset_group, dataset, process_status = dataset_props
    if process_status:
        seg_paths = glob.glob(f'{base_dir}/{dataset_group}/{dataset}/processed_data/labels/**')
    else:
        seg_paths = glob.glob(glob_dir)
    for r_id, sample in enumerate(seg_paths):
        print("\rProcessing {} {} {}/{}.".format(vols_to_look_type, dataset, r_id, len(seg_paths)), end="")
        nifti_file = nb.load(sample)
        header = nifti_file.header
        if process_status:
            v_id = sample.split('/')[-1].split('.')[0]
            pix_volume = np.prod([2,2,3])
        else:
            v_id = sample.split('/')[-2]
            pix_volume = np.prod(header['pixdim'][1:4])
        
        if vols_to_look is not None and v_id in vols_to_look:
            continue
        
        s_data = nifti_file.get_fdata()
        
        unique_vals, counts = np.unique(s_data, return_counts=True)
        if len(counts) is not 3:
            print('not all class present, skipping!!')
            continue
        if v_id not in sample_count_dict.keys():
            sample_count_dict[v_id] = {}

        sample_count_dict[v_id]['seg_liver'] = np.round(counts[2]*pix_volume)
        sample_count_dict[v_id]['seg_spleen'] = np.round(counts[1]*pix_volume)
    
    print('')
    df = pd.DataFrame.from_dict(sample_count_dict, orient="index")
    path_to_save = f'{base_dir}/{dataset_group}/{dataset}/{vols_to_look_type}_processed_{process_status}_sample_count_report.csv'
    df.to_csv(path_to_save, index_label='volume_id')
    return {f'{vols_to_look_type}_{dataset}_processed_{process_status}': path_to_save}

In [35]:
def merge_all_model_features(props):
    base_dir, proj, model, dataset, report_id = props
    df = pd.read_csv(f'./projects/{proj}/reports/{proj}_{dataset}_{model}/{dataset}/{report_id}_sample_count_report.csv')
    df_k_feats = pd.read_csv('./dataset_groups/whole_body_datasets/KORA/processsed_csv_.csv')
    df_reports = pd.read_csv(f'./projects/{proj}/reports/{proj}_{dataset}_{model}/{dataset}/{report_id}_report.csv')
    df_merged = pd.merge(df, df_k_feats, how='inner', on=['volume_id'])
    df_merged_final = pd.merge(df_merged, df_reports, how='inner', on=['volume_id'])
    path_to_save = f'./projects/{proj}/reports/{proj}_{dataset}_{model}/{dataset}/{report_id}_concat_report_final.csv'
    df_merged_final.to_csv(path_to_save)
    return {f'{proj}':path_to_save}

def merge_all_dataset_features(props, vols_to_look_type='all'):
    base_dir, glob_dir, dataset_group, dataset, process_status = props
    df = pd.read_csv(f'{base_dir}/{dataset_group}/{dataset}/{vols_to_look_type}_processed_{process_status}_sample_count_report.csv')
    df_k_feats = pd.read_csv('./dataset_groups/whole_body_datasets/KORA/processsed_csv_.csv')
    df_merged = pd.merge(df, df_k_feats, how='inner', on=['volume_id'])
    path_to_save = f'{base_dir}/{dataset_group}/{dataset}/{vols_to_look_type}_processed_{process_status}_concat_report_final.csv'
    df_merged.to_csv(path_to_save)
    return {f'{vols_to_look_type}_{dataset}_processed_{process_status}':path_to_save}

# Ground Truth Segmentation data aggregation

In [5]:
base_dir = '/home/abhijit/Jyotirmay/my_thesis/dataset_groups'
glob_dir = '/home/abhijit/nas_drive/Data_WholeBody/KORA/KORA_segs/ROI_liver_spleen_Daniel/**/comp_mask.nii'
dataset_group = 'whole_body_datasets'
dataset = 'KORA'
process_status = False

unprocessed_dataset_props = base_dir, glob_dir, dataset_group, dataset, process_status

In [6]:
base_dir = '/home/abhijit/Jyotirmay/my_thesis/dataset_groups'
glob_dir = '/home/abhijit/Jyotirmay/my_thesis/dataset_groups'
dataset_group = 'whole_body_datasets'
dataset = 'KORA'
process_status = True

processed_dataset_props = base_dir, glob_dir, dataset_group, dataset, process_status

# Full_bayesian Segmentation data aggregation

In [43]:
base_dir = '/home/abhijit/Jyotirmay/my_thesis'
proj = 'full_bayesian'
model = 'v2'
dataset = 'KORA'
report_id = '10_1571866968.4002764'

fb_props = base_dir, proj, model, dataset, report_id

In [None]:
# base_dir = '/home/abhijit/Jyotirmay/my_thesis'
# proj = 'full_bayesian'
# model = 'v3'
# dataset = 'KORA'
# report_id = '10_1572536287.589728'

# fb_props_0dot05 = base_dir, proj, model, dataset, report_id

In [45]:
base_dir = '/home/abhijit/Jyotirmay/my_thesis'
proj = 'full_bayesian_0dot01'
model = 'v4'
dataset = 'KORA'
report_id = '10_1572514598.527084'

fb_props_0dot01 = base_dir, proj, model, dataset, report_id

# MC_Dropout Segmentation data aggregation

In [8]:
base_dir = '/home/abhijit/Jyotirmay/my_thesis'
proj = 'MC_dropout_quicknat'
model = 'v2'
dataset = 'KORA'
report_id = '10_1572006141.7793334'

mcdropout_props = base_dir, proj, model, dataset, report_id

# Probablisitic_quicknat Segmentation data aggregation

In [9]:
base_dir = '/home/abhijit/Jyotirmay/my_thesis'
proj = 'probabilistic_quicknat'
model = 'v2'
dataset = 'KORA'
report_id = '10_1571996796.7963011'

probabilistic_props = base_dir, proj, model, dataset, report_id

# Hierarchical_quicknat Segmentation data aggregation

In [10]:
base_dir = '/home/abhijit/Jyotirmay/my_thesis'
proj = 'hierarchical_quicknat'
model = 'v2'
dataset = 'KORA'
report_id = '10_1571905560.9377904'

hierarchical_props = base_dir, proj, model, dataset, report_id

In [50]:
all_models_prop = [fb_props, mcdropout_props, probabilistic_props, hierarchical_props, fb_props_0dot01]
all_models_prop = [fb_props_0dot01]
# all_datasets_prop = [unprocessed_dataset_props, processed_dataset_props]

# Reading volumes

In [47]:
test_vols = ['KORA2459774','KORA2453328','KORA2452913','KORA2452353','KORA2456928','KORA2462380','KORA2459873',
             'KORA2453082','KORA2455268','KORA2452967','KORA2453048','KORA2453677','KORA2452812','KORA2460903',
             'KORA2452364','KORA2460348','KORA2461338','KORA2461868','KORA2460326','KORA2453172','KORA2453136',
             'KORA2452206','KORA2460878','KORA2452129','KORA2456278','KORA2456199','KORA2460565','KORA2454788',
             'KORA2460174','KORA2453290','KORA2459605','KORA2460768','KORA2460504','KORA2453194','KORA2452834',
             'KORA2459123','KORA2453150','KORA2462345','KORA2460830','KORA2459310','KORA2459455','KORA2459763',
             'KORA2453620','KORA2461392','KORA2453578','KORA2453524','KORA2452409','KORA2460785','KORA2460867',
             'KORA2453642','KORA2459972','KORA2456202','KORA2455946','KORA2456917','KORA2452426','KORA2461184',
             'KORA2456379','KORA2456241','KORA2456793','KORA2452263','KORA2460633','KORA2452316','KORA2453844',
             'KORA2462093','KORA2459752','KORA2453470','KORA2459477','KORA2453306','KORA2461409','KORA2460779',
             'KORA2460309','KORA2456340','KORA2461206','KORA2460824','KORA2461956','KORA2459908','KORA2452924',
             'KORA2459947','KORA2453464','KORA2460216','KORA2461493','KORA2453374','KORA2452687','KORA2460315',
             'KORA2461146','KORA2460249','KORA2452941','KORA2453732','KORA2452338','KORA2453363','KORA2453833',
             'KORA2459526','KORA2459983','KORA2461632','KORA2459548','KORA2462150','KORA2461520','KORA2459807',
             'KORA2462374','KORA2458040','KORA2456661','KORA2452659','KORA2458366','KORA2452801','KORA2453811',
             'KORA2456672','KORA2457266','KORA2457044','KORA2453765','KORA2458402','KORA2461349','KORA2455525',
             'KORA2455296','KORA2458158','KORA2460447','KORA2455753','KORA2455935','KORA2456562','KORA2458197',
             'KORA2458707','KORA2455951','KORA2460889','KORA2460472','KORA2456385','KORA2455042','KORA2452665',
             'KORA2452190','KORA2462161','KORA2461151','KORA2459681','KORA2458068','KORA2457401','KORA2461555',
             'KORA2459745','KORA2453037','KORA2458265','KORA2457517','KORA2452868','KORA2462119','KORA2452381',
             'KORA2452094','KORA2459067','KORA2462352','KORA2456357','KORA2459244','KORA2461577','KORA2456021',
             'KORA2459499','KORA2461885','KORA2461252','KORA2460145','KORA2453589','KORA2460543'
            ]

In [49]:
model_vols = [vol_counts(prop) for prop in [fb_props_0dot01]]
# dataset_vols = [ground_truth_vol_counts(prop, vols_to_look=None, vols_to_look_type='all') for prop in all_datasets_prop]
# dataset_vols = [ground_truth_vol_counts(prop, vols_to_look=test_vols, vols_to_look_type='test') for prop in all_datasets_prop]

Processing full_bayesian 1682/1683.


In [51]:
model_merged_feats_path = [merge_all_model_features(prop) for prop in all_models_prop]
# all_dataset_merged_feats_path = [merge_all_dataset_features(prop) for prop in all_datasets_prop]
# test_dataset_merged_feats_path = [merge_all_dataset_features(prop, vols_to_look_type='test') for prop in all_datasets_prop]

In [52]:
print(model_merged_feats_path)

[{'full_bayesian': './projects/full_bayesian/reports/full_bayesian_KORA_v4/KORA/10_1572514598.527084_concat_report_final.csv'}]


In [41]:
print(all_dataset_merged_feats_path)

[{'all_KORA_processed_False': '/home/abhijit/Jyotirmay/my_thesis/dataset_groups/whole_body_datasets/KORA/all_processed_False_concat_report_final.csv'}, {'all_KORA_processed_True': '/home/abhijit/Jyotirmay/my_thesis/dataset_groups/whole_body_datasets/KORA/all_processed_True_concat_report_final.csv'}]


In [40]:
print(test_dataset_merged_feats_path)

[{'test_KORA_processed_False': '/home/abhijit/Jyotirmay/my_thesis/dataset_groups/whole_body_datasets/KORA/test_processed_False_concat_report_final.csv'}, {'test_KORA_processed_True': '/home/abhijit/Jyotirmay/my_thesis/dataset_groups/whole_body_datasets/KORA/test_processed_True_concat_report_final.csv'}]
