In [1]:
import numpy as np
import nibabel as nib
import pandas as pd

import dipy.tracking.utils
import dipy.tracking.streamline

import os, sys


In [2]:
patient_ids = [1, 4, 6, 7, 10, 11, 12]

In [3]:
data_dir = "/user/pfilipia/home/inria/chu_nice_inria/patients_dmri"

def get_patient_dir(patient_id):
    return os.path.join(
        data_dir, "patient%02d/bids/sub-patient%02d/ses-presurgical" % (patient_id, patient_id)
    )

def get_connectivity_dir(patient_id):
    return os.path.join(
        get_patient_dir(patient_id), "connectivity"
    )

def get_connectivity_file(patient_id):
    return os.path.join(
        get_connectivity_dir(patient_id), "connections_common_avg_seed5k_after_shift.csv"
    )    

def get_percentile_connectivity_file(patient_id, percentile):
    return os.path.join(
        get_connectivity_dir(patient_id), "connections_common_avg_seed5k_after_shift_p%02d.csv" % percentile
    )    

def get_tck_streamlines_dir(patient_id):
    return os.path.join(
        get_connectivity_dir(patient_id), "tck_streamlines"
    )

def get_tck_streamlines_file(patient_id, stimulation_site, recording_electrode):
    return os.path.join(
        get_tck_streamlines_dir(patient_id),
        "%s_%s_diam10_in00.tck" % (stimulation_site, recording_electrode)
    )

def get_chunk_file(patient_id, stimulation_site, recording_electrode, chunk_id):
    return os.path.join(
        get_tck_streamlines_dir(patient_id),
        "%s_%s_indices_%d.csv" % (stimulation_site, recording_electrode, chunk_id)
    )

def get_weights_file(patient_id, stimulation_site, recording_electrode):
    return os.path.join(
        get_tck_streamlines_dir(patient_id),
        "%s_%s_diam10_in00_weight.txt" % (stimulation_site, recording_electrode)
    )


In [11]:
def reduce_indices(patient_id, limit_length_percentile = 100):

    all_indices = [
        'fa', 'md', 'ad', 'rd',
        'rtop', 'rtap', 'rtpp', 'msd', 'qiv_e9', 'ng', 'ng_perp', 'ng_par'
    ]

    output_cols = [
        'fa', 'fa_std', 'md', 'md_std', 'ad', 'ad_std', 'rd', 'rd_std',
        'rtop', 'rtap', 'rtpp', 'msd', 'qiv_e9', 'ng', 'ng_perp', 'ng_par'
    ]
    
    connectivity_file_cols = {
        'fa': 'mean_fa', 'fa_std': 'std_fa', 'md': 'mean_md', 'md_std': 'std_md',
        'ad': 'mean_ad', 'ad_std': 'std_ad', 'rd': 'mean_rd', 'rd_std': 'std_rd',
        'rtop': 'mean_rtop', 'rtap': 'mean_rtap', 'rtpp': 'mean_rtpp', 'msd': 'mean_msd',
        'qiv_e9': 'mean_qiv', 'ng': 'ng', 'ng_perp': 'ng_perp', 'ng_par': 'ng_par'
    }

    data_pd = {}

    output_lines = [] 
    data_pd[patient_id] = pd.read_csv(get_connectivity_file(patient_id), skipinitialspace=True)

    for data_row in data_pd[patient_id].iterrows():
        
        data_chunks = []
        for chunk_id in range(10):
            
            try:
                
                f = open(
                    get_chunk_file(
                        patient_id, data_row[1]['stimulation_site'], data_row[1]['recording_electrode'], chunk_id
                    ), 'r'
                )
                str_input = f.read().replace("\n\t", "\t").replace("\t\n", "\n")
                f.close()

                chunk_pd = pd.read_csv(pd.compat.StringIO(str_input), sep='\t', skipinitialspace=True)

                for idx in all_indices:
                    chunk_pd = chunk_pd[chunk_pd[idx] > 0]
                
                if not 'length' in chunk_pd.columns:
                    continue
                        
                data_chunks.append(chunk_pd[['weight', 'length'] + all_indices])
                
            except:
                print("Exception")
                None
                
        output_line = "%s %s " % (data_row[1]['stimulation_site'], data_row[1]['recording_electrode'])

        try:
            all_chunks_pd = pd.concat(data_chunks)
                      
            if limit_length_percentile > 100:
                min_length_percentile = limit_length_percentile - 100
            else:
                min_length_percentile = 0

            percentile_value = np.percentile(all_chunks_pd['length'].values, np.minimum(100, limit_length_percentile))
            percentile_pd = all_chunks_pd[all_chunks_pd['length'] <= percentile_value]

            min_length_percentile_value = np.percentile(all_chunks_pd['length'].values, min_length_percentile)
            percentile_pd = percentile_pd[percentile_pd['length'] >= min_length_percentile_value]
            
            for output_col in output_cols:
                if output_col in percentile_pd.keys():
                    output_line += "%f " % np.average(
                        percentile_pd[[output_col]].values, weights=percentile_pd[['weight']].values
                    )
                else:
                    output_line += "0 "
        except:
            for output_col in output_cols:
                output_line += "0 "
                    
        output_lines.append(output_line.strip())

    indices = []
    for output_line in sorted(output_lines):
        indices_row = np.array(output_line[8:].split(" "), dtype=float)
        indices.append(indices_row)
        
    data_pd[patient_id][[connectivity_file_cols[col] for col in output_cols]] = np.array(indices)
    data_pd[patient_id].to_csv(
        get_percentile_connectivity_file(patient_id, limit_length_percentile), sep=',', index=False
    )


In [13]:
for patient_id in patient_ids:
    for percentile in [100, 110, 120, 130, 140, 150, 160, 170, 180, 190]: # [1, 2, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]:
        print("PATIENT_ID = %d, PERCENTILE = %d" % (patient_id, percentile))
        reduce_indices(patient_id, percentile)

PATIENT_ID = 1, PERCENTILE = 100
PATIENT_ID = 1, PERCENTILE = 110
PATIENT_ID = 1, PERCENTILE = 120
PATIENT_ID = 1, PERCENTILE = 130
PATIENT_ID = 1, PERCENTILE = 140
PATIENT_ID = 1, PERCENTILE = 150
PATIENT_ID = 1, PERCENTILE = 160
PATIENT_ID = 1, PERCENTILE = 170
PATIENT_ID = 1, PERCENTILE = 180
PATIENT_ID = 1, PERCENTILE = 190
PATIENT_ID = 4, PERCENTILE = 100
PATIENT_ID = 4, PERCENTILE = 110
PATIENT_ID = 4, PERCENTILE = 120
PATIENT_ID = 4, PERCENTILE = 130
PATIENT_ID = 4, PERCENTILE = 140
PATIENT_ID = 4, PERCENTILE = 150
PATIENT_ID = 4, PERCENTILE = 160
PATIENT_ID = 4, PERCENTILE = 170
PATIENT_ID = 4, PERCENTILE = 180
PATIENT_ID = 4, PERCENTILE = 190
PATIENT_ID = 6, PERCENTILE = 100
PATIENT_ID = 6, PERCENTILE = 110
PATIENT_ID = 6, PERCENTILE = 120
PATIENT_ID = 6, PERCENTILE = 130
PATIENT_ID = 6, PERCENTILE = 140
PATIENT_ID = 6, PERCENTILE = 150
PATIENT_ID = 6, PERCENTILE = 160
PATIENT_ID = 6, PERCENTILE = 170
PATIENT_ID = 6, PERCENTILE = 180
PATIENT_ID = 6, PERCENTILE = 190
PATIENT_ID