In [2]:
from pathlib import Path
import os
from tqdm import tqdm
import pandas as pd
import numpy as np
import csv

In [3]:
def pull_from_csv(csv_file, row=3, header=1):
        lwp_ind = str(csv_file).lower().find('lwp')
        lwp = str(csv_file)[lwp_ind:lwp_ind+14]
        df = pd.read_csv(csv_file, header=[header], on_bad_lines='skip')
        try:
            return [lwp,*df.iloc[row].name]
        except:
            return [lwp,*df.iloc[row]]
        
        
        
        
        
def met_ratio(df, up, down, error_method='Cramer', error_label=' Error (SD)', output='both'):
    if not isinstance(up,list):
        up = [up]
    if not isinstance(down,list):
        down = [down]    
   
    up_err_name = [f'{item}{error_label}' for item in up]
    down_err_name = [f'{item}{error_label}' for item in down]
    for item in np.concatenate([up_err_name, down_err_name]).flat:
        if item not in df.columns:
            raise KeyError(f'Cannot find "{item}" in the DataFrame. Please check the given name is correct.')
    
    up_amp = df[up].astype(float).sum(axis=1)
    down_amp = df[down].astype(float).sum(axis=1)
    if error_method.lower() == 'cramer':
        up_err = (df[up_err_name].astype(float)**2).sum(axis=1) ** 0.5
        down_err = (df[down_err_name].astype(float)**2).sum(axis=1) ** 0.5
    elif error_method.lower()=='narrow':
        up_err = df[up_err_name].astype(float).max(axis=1) 
        down_err = df[down_err_name].astype(float).max(axis=1) 
    else:
        raise KeyError('Please choose "Cramer" or "Narrow" for the error_method')
    
    ratio = up_amp / down_amp
    ratio_err = ratio * ((up_err/up_amp)**2 + (down_err/down_amp)**2)**0.5 
    
    if len(up)>1:
        up_label =f'({"+".join(up)})'
    else:
        up_label = f'{"+".join(up)}'
    if len(down)>1:
        down_label = f'({"+".join(down)})'
    else:
        down_label = f'{"+".join(down)}'    
    col_name = f'{up_label}/{down_label}'   
    col_error_name = f'{col_name} Error (SD) {error_method}'
    
    if output=='both':
        return pd.DataFrame({col_name: ratio, col_error_name: ratio_err})
    elif output=='ratio':
        return pd.DataFrame({col_name: ratio})      
    elif output=='error':
        return pd.DataFrame({col_error_name: ratio_err})   
    else:
        raise KeyError('Please choose "ratio", "error" or "both" (default) as output')

In [4]:
inspire_P31_results = Path('/Users/patxi/Sync/Projects/PAINT_MRS/INSPIRE_31P_RESULTS')

data_folder = inspire_P31_results

In [18]:
csv_files = [f for f in sorted(data_folder.rglob('*'), key=lambda x:x.parent.parent.stem[4:6]) if ("_31p_" in f.name.lower() and ".txt" in f.name.lower())]



rows_to_process = {
                   'WM': 0,
                   }

for label,row_num in rows_to_process.items():
    data_list = []
    for file in csv_files:
        pulled_data = [*pull_from_csv(file,row=row_num),*pull_from_csv(file,row=row_num+3),*pull_from_csv(file,header=6,row=row_num+1)]
        data_list.append(pulled_data)
        print(f'{pulled_data[0]} has {len(pulled_data)} elements')

    
        print('{')
    print(f'\n{data_list[0] = }')   
    print(f'{len(data_list[0]) = }')
    
    row0 = pull_from_csv(file,row=row_num)
    print(f'\n{row0 = }')   
    print(f'{len(row0) = }')
    
    row3 = pull_from_csv(file,row=row_num+3)
    print(f'\n{row3 = }')   
    print(f'{len(row3) = }')
    
    H6row1 = pull_from_csv(file,header=6,row=row_num+1)
    print(f'\n{H6row1 = }')   
    print(f'{len(H6row1) = }')
    
    
    
    my_columns = list(pd.read_csv(file, header=[1], on_bad_lines='skip'))
    print(f'\n{my_columns=} ')
    print(f'{len(my_columns)=} ')

    my_errors = [item+' Error (SD)' for item in my_columns]
    print(f'\n{my_errors=} ')
    print(f'{len(my_errors)=} ')


    my_diagnostics = pull_from_csv(file,row=0, header=6)    
    print(f'\n{my_diagnostics=} ')
    print(f'{len(my_diagnostics)=} ')

    total_columns = ['LWP', *my_columns, 'Error for LWP', *my_errors,*my_diagnostics ]
    print(f'\nTotal number of columns = {len(total_columns)}  ')
    df = pd.DataFrame(data_list, columns=total_columns)
    
    df.to_excel(f'{label}.xlsx', index=False)
    
    #now calculate ratios
    # df_L2N = met_ratio(df,['Lac', 'Threonine'], 'TNAA')
    # df_L2N_narrow_error = met_ratio(df,['Lac', 'Threonine'], 'TNAA', error_method='Narrow', output='error')
    # df_N2C = met_ratio(df,'TNAA', 'TCho')
    # df_C2C = met_ratio(df,'TCho', 'Cr')
    # df_N2Cr = met_ratio(df,'TNAA', 'Cr')


    # df_ratio = pd.concat((df_L2N, df_N2C, df_C2C, df_N2C, df), axis=1)
    # df_ratio = pd.concat((df_L2N,df_L2N_narrow_error, df_N2C, df_C2C, df_N2Cr, df), axis=1)

    # df.set_index('LWP').to_excel(f'{label}_RATIOS_PRESS_INSPIRE.xlsx')
    df.set_index('LWP').to_excel(f'{label}_INSPIRE_31P_03Jan2023.xlsx')

 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{
 has 6 elements
{

data_list[0] = ['', 'Name of Patient: ', '', 'Additional Inform

In [92]:
# csv_files = [f for f in sorted(data_folder.rglob('*'), key=lambda x:x.parent.parent.stem[4:6]) if ("_31P_" in f.name.lower() and ".txt" in f.name.lower())]
cols = ['LWP', 'Gamma1', 'Gamma2', 'alpha1', 'alpha2', 'beta1', 'beta2', 'beta3', 'NAD', 'PCr', 'GPC', 'GPE', 'Pi', 'Pi2','PME', 'PCh', 'UDP', 'Pi3', 'PME2','Pi4']

files = [f for f in data_folder.rglob('*') if ("31P" in f.name  and ".txt" in f.name)]

data_dict = {"LWP":[], 
             "freq":[],
             "amp":[]}

with open(r'freqs.txt', 'w') as fp:
    for file in files:
        name = file.name[0:4]
        df= pd.read_csv(file, header=1, on_bad_lines='skip')

        freq_label = df.iloc[8][0]
        freqtext = df.iloc[9][0]
        # freqs = [float(f) for f in freqtext.split("\t")[:-1]]
        
        

        amp_label = df.iloc[12][0]
        amptext = df.iloc[13][0]
        # amps = [float(f) for f in amptext.split("\t")[:-1]]

        
        fp.write("%s\n"  % f'{name}\t {freqtext} \t {amptext}')
        print('Done')

    # data_dict["LWP"].append(name)
    # data_dict["freq"].append(freqs[:])
    # data_dict["amp"].append(amps[:])
    
    


# results_df = pd.DataFrame(data_dict, columns=total_columns)
# results_df.to_csv("INSPIRE_31P_03Jan2023.csv")

Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done
Done


In [74]:
# df.to_excel('kkkk2.xlsx')
freq_label = df.iloc[8][0]
freqtext = df.iloc[9][0]
freqs = [float(f) for f in freqtext.split("\t")[:-1]]

cols = ['LWP', 'Gamma1', 'Gamma2', 'alpha1', 'alpha2', 'beta1', 'beta2', 'beta3', 'NAD', 'PCr', 'GPC', 'GPE', 'Pi', 'Pi2','PME', 'PCh', 'UDP', 'Pi3', 'PME2','Pi4']

amp_label = df.iloc[12][0]
amptext = df.iloc[13][0]
amps = [float(f) for f in amptext.split("\t")[:-1]]
amps

[1.9321e-06,
 1.9321e-06,
 1.8436e-06,
 1.8436e-06,
 6.5154e-08,
 3.2577e-08,
 3.2577e-08,
 2.7989e-07,
 3.751e-06,
 1.4192e-06,
 1.4289e-07,
 0.0,
 1.6117e-06,
 1.0348e-06,
 1.9615e-07,
 2.1233e-07,
 0.0,
 1.22e-07,
 0.0]

In [89]:
amptext

'1.6706E-6\t1.6706E-6\t1.9363E-6\t1.9363E-6\t7.5958E-7\t3.7979E-7\t3.7979E-7\t0E0\t3.9656E-6\t2.3698E-6\t0E0\t0E0\t1.5752E-6\t2.711E-6\t8.221E-8\t6.5512E-8\t0E0\t3.2257E-7\t4.0838E-8\t'

In [None]:
# list of names
names = ['Jessa', 'Eric', 'Bob']

# open file in write mode
with open(r'freqs.txt', 'w') as fp:
    for item in names:
        # write each item on a new line
        fp.write("%s\n" % item)
    print('Done')