In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np 
import glob
import nibabel as nb
import itertools
import json
import sys
import statsmodels.api as sm
from statsmodels.formula.api import ols

from scipy import stats
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder
import pandas as pd

%matplotlib inline

In [2]:
flatten = lambda l: [item for sublist in l for item in sublist]

In [3]:
#df = pd.read_csv('/home/abhijit/Jyotirmay/my_thesis/projects/MC_dropout_quicknat/reports/MC_dropout_quicknat_UKB_v2/UKB/10_0.0_report.csv', index_col=0)
#df['real_volume_id'] = df['volume_id']
#df['volume_id'] = df['volume_id']+df['target_scan_file'].apply(lambda x:x.split('.')[0][-1])
#df

In [4]:
#df.to_csv('/home/abhijit/Jyotirmay/my_thesis/projects/MC_dropout_quicknat/reports/MC_dropout_quicknat_UKB_v2/UKB/0_0.0_report.csv')

In [5]:
def vol_counts(model_props):   
    sample_count_dict = {}
    base_dir, proj, model, dataset, report_id = model_props
    if dataset=='KORA':
        sample_dir = f'{base_dir}/projects/{proj}/outs/{proj}_{dataset}_{model}/{proj}_{dataset}_{model}_predictions_{dataset}/*_samples/**'
        mean_segs_dir = f'{base_dir}/projects/{proj}/outs/{proj}_{dataset}_{model}/{proj}_{dataset}_{model}_predictions_{dataset}/*_seg.nii.gz'
    else:
        ukb_out_dir = '/home/abhijit/nas_drive/Abhijit/Jyotirmay/ukb_outs'
        #'/home/abhijit/nas_drive/Abhijit/Jyotirmay/ukb_outs/outs/probabilistic_quicknat_UKB_v2/
        #probabilistic_quicknat_UKB_v2_predictions_UKB'
        sample_dir = f'{ukb_out_dir}/outs/{proj}_{dataset}_{model}/{proj}_{dataset}_{model}_predictions_{dataset}/*_samples/**'
        mean_segs_dir = f'{ukb_out_dir}/outs/{proj}_{dataset}_{model}/{proj}_{dataset}_{model}_predictions_{dataset}/*_seg.nii.gz'
    
    seg_paths = glob.glob(sample_dir)
    mean_seg_paths = glob.glob(mean_segs_dir)
    seg_paths.extend(mean_seg_paths)
    for r_id, sample in enumerate(seg_paths):
        print("\rProcessing {} {}/{}.".format(proj, r_id, len(seg_paths)), end="")

        split_str = sample.split('/')[-1].split('.')[0]
        v_id = split_str.split('_')[0]
        if dataset == 'UKB':
            v_id = v_id + '_20201_2_0'
            #if v_id not in good_vols:
            #    continue
        sample_id = split_str.split('_')[-1]
        nifti_file = nb.load(sample)
        header = nifti_file.header
        pix_volume = np.prod([2,2,3])
        s_data = nifti_file.get_fdata()
        unique_vals, counts = np.unique(s_data, return_counts=True)
        if len(counts) is not 3:
            print('not all class present, skipping!!')
            continue
        if v_id not in sample_count_dict.keys():
            sample_count_dict[v_id] = {str(i)+'_spleen':None for i in range(10)}
            sample_count_dict[v_id].update({str(i)+'_liver':None for i in range(10)})
        if sample_id == 'seg':
            sample_count_dict[v_id]['seg_liver'] = np.round(counts[2]*pix_volume)
            sample_count_dict[v_id]['seg_spleen'] = np.round(counts[1]*pix_volume)
        else:
            sample_count_dict[v_id][str(sample_id)+'_liver'] = counts[2]*pix_volume    
            sample_count_dict[v_id][str(sample_id)+'_spleen'] = counts[1]*pix_volume
    
    print('')
    df = pd.DataFrame.from_dict(sample_count_dict, orient="index")
    path_to_save = f'{base_dir}/projects/{proj}/reports/{proj}_{dataset}_{model}/{dataset}/{report_id}_sample_count_report.csv'
    df.to_csv(path_to_save, index_label='volume_id')
    return {f'{proj}': path_to_save}

def ground_truth_vol_counts(dataset_props, vols_to_look=None, vols_to_look_type='all'):
    sample_count_dict = {}
    base_dir, glob_dir, dataset_group, dataset, process_status = dataset_props
    if process_status:
        seg_paths = glob.glob(f'{base_dir}/{dataset_group}/{dataset}/processed_data/labels/**')
    else:
        seg_paths = glob.glob(glob_dir)
    for r_id, sample in enumerate(seg_paths):
        print("\rProcessing {} {} {}/{}.".format(vols_to_look_type, dataset, r_id, len(seg_paths)), end="")
        nifti_file = nb.load(sample)
        header = nifti_file.header
        if process_status:
            v_id = sample.split('/')[-1].split('.')[0]
            pix_volume = np.prod([2,2,3])
        else:
            v_id = sample.split('/')[-2]
            pix_volume = np.prod(header['pixdim'][1:4])
        
        if vols_to_look is not None and v_id in vols_to_look:
            continue
        
        s_data = nifti_file.get_fdata()
        
        unique_vals, counts = np.unique(s_data, return_counts=True)
        if len(counts) is not 3:
            print('not all class present, skipping!!')
            continue
        if v_id not in sample_count_dict.keys():
            sample_count_dict[v_id] = {}

        sample_count_dict[v_id]['seg_liver'] = np.round(counts[2]*pix_volume)
        sample_count_dict[v_id]['seg_spleen'] = np.round(counts[1]*pix_volume)
    
    print('')
    df = pd.DataFrame.from_dict(sample_count_dict, orient="index")
    path_to_save = f'{base_dir}/{dataset_group}/{dataset}/{vols_to_look_type}_processed_{process_status}_sample_count_report.csv'
    df.to_csv(path_to_save, index_label='volume_id')
    return {f'{vols_to_look_type}_{dataset}_processed_{process_status}': path_to_save}

In [6]:
def organise_target_and_volumeid(df_path='/home/abhijit/Jyotirmay/my_thesis/projects/MC_dropout_quicknat/reports/MC_dropout_quicknat_UKB_v2/UKB/10_1573078374.453554_report.csv'):
    df = pd.read_csv(df_path, index_col=0)
    if df['dataset'].values[1] != 2:
        print('Not a UKB dataset report, not processing!')
        return False
    def replace_func(value):
        string = value[-1]
        return f'Dixon_BH_17s_opp_Dixon_BH_17{string}.nii.gz' if string == 's' else f'Dixon_BH_17s_opp_Dixon_BH_17s{string}.nii.gz'
    df['target_scan_file'] = df['volume_id'].apply(replace_func)
    df['volume_id'] = df['volume_id'].apply(lambda x: x[:-1])
    return df

def concat_partial_model_reports(paths):
    df = None
#     ids = ['10_1573078374.453554', '10_1573225388.879571', '10_1573391294.0298784']
#     paths = [f'/home/abhijit/Jyotirmay/my_thesis/projects/MC_dropout_quicknat/reports/MC_dropout_quicknat_UKB_v2/UKB/{i}_report.csv' for i in ids]
    
    for ix, p in enumerate(paths):
        df_ = organise_target_and_volumeid(p)
        df_ = df_.drop(df_.index[0])
        if ix == 0:
            df = df_
        else:
            df = df.append(df_, ignore_index=True)
    return df

In [7]:
def merge_all_model_features(props):
    base_dir, proj, model, dataset, report_id = props
    print(props)
    df = pd.read_csv(f'./projects/{proj}/reports/{proj}_{dataset}_{model}/{dataset}/{report_id}_sample_count_report.csv')
    if dataset is 'KORA':
        df_k_feats = pd.read_csv('./dataset_groups/whole_body_datasets/KORA/processsed_csv_.csv')
        df_reports = pd.read_csv(f'./projects/{proj}/reports/{proj}_{dataset}_{model}/{dataset}/{report_id}_report.csv')
    else:
        df_k_feats = pd.read_csv('./dataset_groups/whole_body_datasets/UKB/ukb_diabetes_basic_feats.csv')
        df_reports = pd.read_csv(f'./projects/{proj}/reports/{proj}_{dataset}_{model}/{dataset}/{report_id}_report.csv')

#         df_reports = organise_target_and_volumeid(f'./projects/{proj}/reports/{proj}_{dataset}_{model}/{dataset}/{report_id}_report.csv')
    
    df['volume_id'] = df['volume_id'].astype(str)
    df_k_feats['volume_id'] = df_k_feats['volume_id'].astype(str)
    df_reports['volume_id'] = df_reports['volume_id'].astype(str)
    df_merged = pd.merge(df, df_k_feats, how='inner', on=['volume_id'])
    df_merged_final = pd.merge(df_merged, df_reports, how='inner', on=['volume_id'])
    path_to_save = f'./projects/{proj}/reports/{proj}_{dataset}_{model}/{dataset}/{report_id}_concat_report_final.csv'
    df_merged_final.to_csv(path_to_save)
    return {f'{proj}':path_to_save}

def merge_all_dataset_features(props, vols_to_look_type='all'):
    base_dir, glob_dir, dataset_group, dataset, process_status = props
    df = pd.read_csv(f'{base_dir}/{dataset_group}/{dataset}/{vols_to_look_type}_processed_{process_status}_sample_count_report.csv')
    if dataset is 'KORA':
        df_k_feats = pd.read_csv('./dataset_groups/whole_body_datasets/KORA/processsed_csv_.csv')
    else:
        df_k_feats = pd.read_csv('./dataset_groups/whole_body_datasets/UKB/ukb_diabetes_basic_feats.csv')
    df_merged = pd.merge(df, df_k_feats, how='inner', on=['volume_id'])
    path_to_save = f'{base_dir}/{dataset_group}/{dataset}/{vols_to_look_type}_processed_{process_status}_concat_report_final.csv'
    df_merged.to_csv(path_to_save)
    return {f'{vols_to_look_type}_{dataset}_processed_{process_status}':path_to_save}

# Ground Truth Segmentation data aggregation

In [8]:
base_dir = '/home/abhijit/Jyotirmay/my_thesis/dataset_groups'
glob_dir = '/home/abhijit/nas_drive/Data_WholeBody/KORA/KORA_segs/ROI_liver_spleen_Daniel/**/comp_mask.nii'
dataset_group = 'whole_body_datasets'
dataset = 'KORA'
process_status = False

unprocessed_dataset_props = base_dir, glob_dir, dataset_group, dataset, process_status

In [9]:
base_dir = '/home/abhijit/Jyotirmay/my_thesis/dataset_groups'
glob_dir = '/home/abhijit/Jyotirmay/my_thesis/dataset_groups'
dataset_group = 'whole_body_datasets'
dataset = 'KORA'
process_status = True

processed_dataset_props_kora = base_dir, glob_dir, dataset_group, dataset, process_status

base_dir = '/home/abhijit/Jyotirmay/my_thesis/dataset_groups'
glob_dir = '/home/abhijit/Jyotirmay/my_thesis/dataset_groups'
dataset_group = 'whole_body_datasets'
dataset = 'UKB'
process_status = True

processed_dataset_props_ukb = base_dir, glob_dir, dataset_group, dataset, process_status

# Full_bayesian Segmentation data aggregation

In [10]:
base_dir = '/home/abhijit/Jyotirmay/my_thesis'
proj = 'full_bayesian'
model = 'v2'
dataset = 'KORA'
report_id = '10_1571866968.4002764'

fb_props_kora = base_dir, proj, model, dataset, report_id

base_dir = '/home/abhijit/Jyotirmay/my_thesis'
proj = 'full_bayesian'
model = 'v2'
dataset = 'UKB'
report_id = ''

fb_props_ukb = base_dir, proj, model, dataset, report_id

In [11]:
# base_dir = '/home/abhijit/Jyotirmay/my_thesis'
# proj = 'full_bayesian'
# model = 'v3'
# dataset = 'KORA'
# report_id = '10_1572536287.589728'

# fb_props_0dot05 = base_dir, proj, model, dataset, report_id

In [12]:
base_dir = '/home/abhijit/Jyotirmay/my_thesis'
proj = 'full_bayesian_0dot01'
model = 'v4'
dataset = 'KORA'
report_id = '10_1572514598.527084'

fb_props_0dot01_kora = base_dir, proj, model, dataset, report_id

base_dir = '/home/abhijit/Jyotirmay/my_thesis'
proj = 'full_bayesian_0dot01'
model = 'v4'
dataset = 'UKB'
report_id = ''

fb_props_0dot01_ukb = base_dir, proj, model, dataset, report_id

# MC_Dropout Segmentation data aggregation

In [13]:
base_dir = '/home/abhijit/Jyotirmay/my_thesis'
proj = 'MC_dropout_quicknat'
model = 'v2'
dataset = 'KORA'
report_id = '10_1572006141.7793334'

mcdropout_props_kora = base_dir, proj, model, dataset, report_id

base_dir = '/home/abhijit/Jyotirmay/my_thesis'
proj = 'MC_dropout_quicknat'
model = 'v2'
dataset = 'UKB'
report_id = '0_0.0'

mcdropout_props_ukb = base_dir, proj, model, dataset, report_id

# Probablisitic_quicknat Segmentation data aggregation

In [14]:
base_dir = '/home/abhijit/Jyotirmay/my_thesis'
proj = 'probabilistic_quicknat'
model = 'v2'
dataset = 'KORA'
report_id = '10_1571996796.7963011'

probabilistic_props_kora = base_dir, proj, model, dataset, report_id

base_dir = '/home/abhijit/Jyotirmay/my_thesis'
proj = 'probabilistic_quicknat'
model = 'v2'
dataset = 'UKB'
report_id = '10_1573834823.1121247'

probabilistic_props_ukb = base_dir, proj, model, dataset, report_id

# Hierarchical_quicknat Segmentation data aggregation

In [15]:
base_dir = '/home/abhijit/Jyotirmay/my_thesis'
proj = 'hierarchical_quicknat'
model = 'v2'
dataset = 'KORA'
report_id = '10_1571905560.9377904'

hierarchical_props_kora = base_dir, proj, model, dataset, report_id

base_dir = '/home/abhijit/Jyotirmay/my_thesis'
proj = 'hierarchical_quicknat'
model = 'v2'
dataset = 'UKB'
report_id = '10_1574308007.2486243'

hierarchical_props_ukb = base_dir, proj, model, dataset, report_id

In [16]:
all_models_prop = [fb_props_ukb, mcdropout_props_ukb, probabilistic_props_ukb, 
                   hierarchical_props_ukb, fb_props_0dot01_ukb]
all_models_prop = [probabilistic_props_ukb]

all_datasets_prop = [processed_dataset_props_ukb]

# Reading volumes

In [17]:
kora_test_vols = ['KORA2459774','KORA2453328','KORA2452913','KORA2452353','KORA2456928','KORA2462380','KORA2459873',
             'KORA2453082','KORA2455268','KORA2452967','KORA2453048','KORA2453677','KORA2452812','KORA2460903',
             'KORA2452364','KORA2460348','KORA2461338','KORA2461868','KORA2460326','KORA2453172','KORA2453136',
             'KORA2452206','KORA2460878','KORA2452129','KORA2456278','KORA2456199','KORA2460565','KORA2454788',
             'KORA2460174','KORA2453290','KORA2459605','KORA2460768','KORA2460504','KORA2453194','KORA2452834',
             'KORA2459123','KORA2453150','KORA2462345','KORA2460830','KORA2459310','KORA2459455','KORA2459763',
             'KORA2453620','KORA2461392','KORA2453578','KORA2453524','KORA2452409','KORA2460785','KORA2460867',
             'KORA2453642','KORA2459972','KORA2456202','KORA2455946','KORA2456917','KORA2452426','KORA2461184',
             'KORA2456379','KORA2456241','KORA2456793','KORA2452263','KORA2460633','KORA2452316','KORA2453844',
             'KORA2462093','KORA2459752','KORA2453470','KORA2459477','KORA2453306','KORA2461409','KORA2460779',
             'KORA2460309','KORA2456340','KORA2461206','KORA2460824','KORA2461956','KORA2459908','KORA2452924',
             'KORA2459947','KORA2453464','KORA2460216','KORA2461493','KORA2453374','KORA2452687','KORA2460315',
             'KORA2461146','KORA2460249','KORA2452941','KORA2453732','KORA2452338','KORA2453363','KORA2453833',
             'KORA2459526','KORA2459983','KORA2461632','KORA2459548','KORA2462150','KORA2461520','KORA2459807',
             'KORA2462374','KORA2458040','KORA2456661','KORA2452659','KORA2458366','KORA2452801','KORA2453811',
             'KORA2456672','KORA2457266','KORA2457044','KORA2453765','KORA2458402','KORA2461349','KORA2455525',
             'KORA2455296','KORA2458158','KORA2460447','KORA2455753','KORA2455935','KORA2456562','KORA2458197',
             'KORA2458707','KORA2455951','KORA2460889','KORA2460472','KORA2456385','KORA2455042','KORA2452665',
             'KORA2452190','KORA2462161','KORA2461151','KORA2459681','KORA2458068','KORA2457401','KORA2461555',
             'KORA2459745','KORA2453037','KORA2458265','KORA2457517','KORA2452868','KORA2462119','KORA2452381',
             'KORA2452094','KORA2459067','KORA2462352','KORA2456357','KORA2459244','KORA2461577','KORA2456021',
             'KORA2459499','KORA2461885','KORA2461252','KORA2460145','KORA2453589','KORA2460543'
            ]

ukb_test_vols = []

In [None]:
model_vols = [vol_counts(prop) for prop in all_models_prop]
# dataset_vols = [ground_truth_vol_counts(prop, vols_to_look=None, vols_to_look_type='all') for prop in all_datasets_prop]
# dataset_vols = [ground_truth_vol_counts(prop, vols_to_look=ukb_test_vols, vols_to_look_type='test') for prop in all_datasets_prop]

Processing probabilistic_quicknat 4880/206866.not all class present, skipping!!
Processing probabilistic_quicknat 4881/206866.not all class present, skipping!!
Processing probabilistic_quicknat 4882/206866.not all class present, skipping!!
Processing probabilistic_quicknat 4883/206866.not all class present, skipping!!
Processing probabilistic_quicknat 4884/206866.not all class present, skipping!!
Processing probabilistic_quicknat 4885/206866.not all class present, skipping!!
Processing probabilistic_quicknat 4886/206866.not all class present, skipping!!
Processing probabilistic_quicknat 4887/206866.not all class present, skipping!!
Processing probabilistic_quicknat 4888/206866.not all class present, skipping!!
Processing probabilistic_quicknat 4889/206866.not all class present, skipping!!
Processing probabilistic_quicknat 11390/206866.not all class present, skipping!!
Processing probabilistic_quicknat 11391/206866.not all class present, skipping!!
Processing probabilistic_quicknat 1139

Processing probabilistic_quicknat 15433/206866.not all class present, skipping!!
Processing probabilistic_quicknat 15434/206866.not all class present, skipping!!
Processing probabilistic_quicknat 15435/206866.not all class present, skipping!!
Processing probabilistic_quicknat 15436/206866.not all class present, skipping!!
Processing probabilistic_quicknat 15437/206866.not all class present, skipping!!
Processing probabilistic_quicknat 15438/206866.not all class present, skipping!!
Processing probabilistic_quicknat 15439/206866.not all class present, skipping!!
Processing probabilistic_quicknat 16540/206866.not all class present, skipping!!
Processing probabilistic_quicknat 16541/206866.not all class present, skipping!!
Processing probabilistic_quicknat 16542/206866.not all class present, skipping!!
Processing probabilistic_quicknat 16543/206866.not all class present, skipping!!
Processing probabilistic_quicknat 16544/206866.not all class present, skipping!!
Processing probabilistic_qui

Processing probabilistic_quicknat 25238/206866.not all class present, skipping!!
Processing probabilistic_quicknat 25271/206866.not all class present, skipping!!
Processing probabilistic_quicknat 25274/206866.not all class present, skipping!!
Processing probabilistic_quicknat 26260/206866.not all class present, skipping!!
Processing probabilistic_quicknat 26261/206866.not all class present, skipping!!
Processing probabilistic_quicknat 26262/206866.not all class present, skipping!!
Processing probabilistic_quicknat 26263/206866.not all class present, skipping!!
Processing probabilistic_quicknat 26264/206866.not all class present, skipping!!
Processing probabilistic_quicknat 26265/206866.not all class present, skipping!!
Processing probabilistic_quicknat 26266/206866.not all class present, skipping!!
Processing probabilistic_quicknat 26267/206866.not all class present, skipping!!
Processing probabilistic_quicknat 26268/206866.not all class present, skipping!!
Processing probabilistic_qui

Processing probabilistic_quicknat 29787/206866.not all class present, skipping!!
Processing probabilistic_quicknat 29788/206866.not all class present, skipping!!
Processing probabilistic_quicknat 29789/206866.not all class present, skipping!!
Processing probabilistic_quicknat 29900/206866.not all class present, skipping!!
Processing probabilistic_quicknat 29901/206866.not all class present, skipping!!
Processing probabilistic_quicknat 29902/206866.not all class present, skipping!!
Processing probabilistic_quicknat 29903/206866.not all class present, skipping!!
Processing probabilistic_quicknat 29904/206866.not all class present, skipping!!
Processing probabilistic_quicknat 29905/206866.not all class present, skipping!!
Processing probabilistic_quicknat 29906/206866.not all class present, skipping!!
Processing probabilistic_quicknat 29907/206866.not all class present, skipping!!
Processing probabilistic_quicknat 29908/206866.not all class present, skipping!!
Processing probabilistic_qui

Processing probabilistic_quicknat 64452/206866.not all class present, skipping!!
Processing probabilistic_quicknat 64453/206866.not all class present, skipping!!
Processing probabilistic_quicknat 64454/206866.not all class present, skipping!!
Processing probabilistic_quicknat 64455/206866.not all class present, skipping!!
Processing probabilistic_quicknat 64456/206866.not all class present, skipping!!
Processing probabilistic_quicknat 64457/206866.not all class present, skipping!!
Processing probabilistic_quicknat 64459/206866.not all class present, skipping!!
Processing probabilistic_quicknat 64740/206866.not all class present, skipping!!
Processing probabilistic_quicknat 64741/206866.not all class present, skipping!!
Processing probabilistic_quicknat 64742/206866.not all class present, skipping!!
Processing probabilistic_quicknat 64743/206866.not all class present, skipping!!
Processing probabilistic_quicknat 64744/206866.not all class present, skipping!!
Processing probabilistic_qui

In [None]:
model_vols

In [None]:
model_merged_feats_path = [merge_all_model_features(prop) for prop in all_models_prop]
# all_dataset_merged_feats_path = [merge_all_dataset_features(prop) for prop in all_datasets_prop]
# test_dataset_merged_feats_path = [merge_all_dataset_features(prop, vols_to_look_type='test') for prop in all_datasets_prop]

In [None]:
print(model_merged_feats_path)

In [None]:
# print(all_dataset_merged_feats_path)

In [None]:
print(test_dataset_merged_feats_path)