In [1]:
from pathlib import Path
import os
from tqdm import tqdm
import pandas as pd
import numpy as np
import csv

In [None]:
def pull_from_csv(csv_file, row=3, header=1):
        lwp_ind = str(csv_file).lower().find('lwp')
        lwp = str(csv_file)[lwp_ind:lwp_ind+6]
        df = pd.read_csv(csv_file, header=[header], error_bad_lines=False)
        try:
            return [lwp,*df.iloc[row].name]
        except:
            return [lwp,*df.iloc[row]]
        
        
        
        
        
def met_ratio(df, up, down, error_method='Cramer', error_label=' Error (SD)', output='both'):
    if not isinstance(up,list):
        up = [up]
    if not isinstance(down,list):
        down = [down]    
   
    up_err_name = [f'{item}{error_label}' for item in up]
    down_err_name = [f'{item}{error_label}' for item in down]
    for item in np.concatenate([up_err_name, down_err_name]).flat:
        if item not in df.columns:
            raise KeyError(f'Cannot find "{item}" in the DataFrame. Please check the given name is correct.')
    
    up_amp = df[up].astype(float).sum(axis=1)
    down_amp = df[down].astype(float).sum(axis=1)
    if error_method.lower() == 'cramer':
        up_err = (df[up_err_name].astype(float)**2).sum(axis=1) ** 0.5
        down_err = (df[down_err_name].astype(float)**2).sum(axis=1) ** 0.5
    elif error_method.lower()=='narrow':
        up_err = df[up_err_name].astype(float).max(axis=1) 
        down_err = df[down_err_name].astype(float).max(axis=1) 
    else:
        raise KeyError('Please choose "Cramer" or "Narrow" for the error_method')
    
    ratio = up_amp / down_amp
    ratio_err = ratio * ((up_err/up_amp)**2 + (down_err/down_amp)**2)**0.5 
    
    if len(up)>1:
        up_label =f'({"+".join(up)})'
    else:
        up_label = f'{"+".join(up)}'
    if len(down)>1:
        down_label = f'({"+".join(down)})'
    else:
        down_label = f'{"+".join(down)}'    
    col_name = f'{up_label}/{down_label}'   
    col_error_name = f'{col_name} Error (SD) {error_method}'
    
    if output=='both':
        return pd.DataFrame({col_name: ratio, col_error_name: ratio_err})
    elif output=='ratio':
        return pd.DataFrame({col_name: ratio})      
    elif output=='error':
        return pd.DataFrame({col_error_name: ratio_err})   
    else:
        raise KeyError('Please choose "ratio", "error" or "both" (default) as output')

In [3]:
# data_folder = Path('/Users/papo/Sync/MRdata/IoN_Piglet/MRS_RAY')
data_folder = Path('/Users/papo/Sync/MRdata/IoN_Piglet/Ellie')
data_folder = Path('/Users/papo/Sync/Projects/PAINT_MRS/INSPIRE_MRS_DATA')

basis_folder = Path('/Users/papo/Sync/Projects/PAINT_MRS_CSI/3_0T_basis_threonine_no_MM')

sdat_files = [f for f in sorted(data_folder.rglob('*')) if ("act.sdat" in f.name.lower() and "csi" in f.name.lower())]
for file in tqdm(sdat_files):
    ref = Path(f'{str(file)[0:-8]}ref.SDAT')
    csv = Path(f'{str(file)[0:-5]}_both.csv')
    pdf = Path(f'{str(file)[0:-4]}pdf')
    # print(csv)
    if not csv.is_file():
        # command = f'tarquin --rows 3 --cols 2 --input {file} --output_pdf {pdf} --output_csv {csv} --input_w {ref} --basis_csv {basis_folder}'
        command = f'tarquin --rows 3 --cols 2 --input {file} --output_csv {csv} --input_w {ref} --basis_csv {basis_folder}'
        print(f'{command}\n') 
        os.system(command)


  0%|          | 0/59 [00:00<?, ?it/s]

tarquin --rows 3 --cols 2 --input /Users/papo/Sync/Projects/PAINT_MRS/INSPIRE_MRS_DATA/LWP710_D1/C7493934_WIP_CSI_PB_auto_TR2S_SENSE_13_2_raw_act.SDAT --output_csv /Users/papo/Sync/Projects/PAINT_MRS/INSPIRE_MRS_DATA/LWP710_D1/C7493934_WIP_CSI_PB_auto_TR2S_SENSE_13_2_raw_act_both.csv --input_w /Users/papo/Sync/Projects/PAINT_MRS/INSPIRE_MRS_DATA/LWP710_D1/C7493934_WIP_CSI_PB_auto_TR2S_SENSE_13_2_raw_ref.SDAT --basis_csv /Users/papo/Sync/Projects/PAINT_MRS_CSI/3_0T_basis_threonine_no_MM



  2%|▏         | 1/59 [00:09<09:06,  9.42s/it]

tarquin --rows 3 --cols 2 --input /Users/papo/Sync/Projects/PAINT_MRS/INSPIRE_MRS_DATA/LWP711_D1/C3595760_WIP_CSI_PB_auto_TR2S_SENSE_13_2_raw_act.SDAT --output_csv /Users/papo/Sync/Projects/PAINT_MRS/INSPIRE_MRS_DATA/LWP711_D1/C3595760_WIP_CSI_PB_auto_TR2S_SENSE_13_2_raw_act_both.csv --input_w /Users/papo/Sync/Projects/PAINT_MRS/INSPIRE_MRS_DATA/LWP711_D1/C3595760_WIP_CSI_PB_auto_TR2S_SENSE_13_2_raw_ref.SDAT --basis_csv /Users/papo/Sync/Projects/PAINT_MRS_CSI/3_0T_basis_threonine_no_MM



  3%|▎         | 2/59 [00:17<08:37,  9.08s/it]

tarquin --rows 3 --cols 2 --input /Users/papo/Sync/Projects/PAINT_MRS/INSPIRE_MRS_DATA/LWP711_Insp002_scan2/C3595760_WIP_CSI_PB_auto_TR2S_SENSE_11_2_raw_act.SDAT --output_csv /Users/papo/Sync/Projects/PAINT_MRS/INSPIRE_MRS_DATA/LWP711_Insp002_scan2/C3595760_WIP_CSI_PB_auto_TR2S_SENSE_11_2_raw_act_both.csv --input_w /Users/papo/Sync/Projects/PAINT_MRS/INSPIRE_MRS_DATA/LWP711_Insp002_scan2/C3595760_WIP_CSI_PB_auto_TR2S_SENSE_11_2_raw_ref.SDAT --basis_csv /Users/papo/Sync/Projects/PAINT_MRS_CSI/3_0T_basis_threonine_no_MM



In [None]:
csv_files = [f for f in sorted(data_folder.rglob('*'), key=lambda x:x.parent.parent.stem[4:6]) if ("act_both.csv" in f.name.lower() and "csi" in f.name.lower())]



rows_to_process = {'BGT': 5, #(BGT = Basal Ganglia and Thalamic, ie deep GM)
                   'WM': 3,
                   }

for label,row_num in rows_to_process.items():
    data_list = []
    for file in csv_files:
        data_list.append([*pull_from_csv(file,row=row_num),*pull_from_csv(file,row=row_num+8),*pull_from_csv(file,header=16,row=row_num+1)])
    my_columns = list(pd.read_csv(file, header=[1], error_bad_lines=False))
    my_errors = [item+' Error (SD)' for item in my_columns]
    my_diagnostics = pull_from_csv(file,row=0, header=16)    
    df = pd.DataFrame(data_list, columns=['LWP', *my_columns, 'Error for LWP', *my_errors,*my_diagnostics ])
    df.to_excel(f'{label}.xlsx', index=False)
    
    #now calculate ratios
    df_L2N = met_ratio(df,['Lac', 'Threonine'], 'TNAA')
    df_L2N_narrow_error = met_ratio(df,['Lac', 'Threonine'], 'TNAA', error_method='Narrow', output='error')
    df_N2C = met_ratio(df,'TNAA', 'TCho')
    df_C2C = met_ratio(df,'TCho', 'Cr')
    df_N2Cr = met_ratio(df,'TNAA', 'Cr')


    # df_ratio = pd.concat((df_L2N, df_N2C, df_C2C, df_N2C, df), axis=1)
    df_ratio = pd.concat((df_L2N,df_L2N_narrow_error, df_N2C, df_C2C, df_N2Cr, df), axis=1)

    df_ratio.set_index('LWP').to_excel(f'{label}_RATIOS_INSPIRE.xlsx')



b'Skipping line 4: expected 1 fields, saw 2\nSkipping line 7: expected 1 fields, saw 2\nSkipping line 8: expected 1 fields, saw 2\nSkipping line 11: expected 1 fields, saw 2\nSkipping line 14: expected 1 fields, saw 2\nSkipping line 15: expected 1 fields, saw 2\nSkipping line 16: expected 1 fields, saw 2\nSkipping line 24: expected 1 fields, saw 3\nSkipping line 27: expected 1 fields, saw 2\nSkipping line 30: expected 1 fields, saw 3\nSkipping line 33: expected 1 fields, saw 2\nSkipping line 34: expected 1 fields, saw 3\nSkipping line 42: expected 1 fields, saw 2\nSkipping line 45: expected 1 fields, saw 3\nSkipping line 47: expected 1 fields, saw 2\nSkipping line 49: expected 1 fields, saw 4\nSkipping line 54: expected 1 fields, saw 2\nSkipping line 62: expected 1 fields, saw 2\nSkipping line 63: expected 1 fields, saw 3\nSkipping line 68: expected 1 fields, saw 2\nSkipping line 69: expected 1 fields, saw 3\nSkipping line 71: expected 1 fields, saw 2\nSkipping line 73: expected 1 fiel

ParserError: Error tokenizing data. C error: Buffer overflow caught - possible malformed input file.


In [None]:
dfbb = met_ratio(df,'TNAA', 'TCho', error_method='Narrow', output='error')
dfbb


Unnamed: 0,TNAA/TCho Error (SD) Narrow
0,0.608414
1,1.21132
2,4.928498
3,0.501901
4,0.32774
5,0.687924
6,0.695159
7,0.920864
8,1.009604
9,
