In [20]:
import os
import pandas as pd
import numpy as np

In [21]:
def read_raw_log(finroot,row):
    
    logfile = finroot + row.filename
    
    dataf = (pd
          .read_table(logfile, header = 2)
          .loc[lambda d: d['Subject'].str.startswith('S{:02d}'.format(row.subject))]
          .assign(advance_TR = lambda d: (d['Event Type'] == 'Pulse').astype(int))
          .assign(start_TR = lambda d: np.cumsum(d['advance_TR']))
          .assign(total_TR = lambda d: d['start_TR'].iloc[-1])
          .loc[lambda d: (d['Event Type'] == 'Video')|(d['Event Type'] == 'Picture')|(d['Event Type'] == 'Quit')]
          .assign(end_TR = lambda d: d['start_TR'].shift(-1).fillna(method = 'ffill').astype(int))
          .assign(Duration = lambda d: d['end_TR'] - d['start_TR'])
          .loc[lambda d: (d['Event Type'] == 'Video')]
          .reset_index()
          [['Trial','Event Type','Time','start_TR','end_TR','total_TR','Duration']]
          # Merge with the movie titles read from underneath the same dataframe
          .merge(pd
                 .read_table(logfile, header = 2)
                 .loc[lambda d: d['Subject'].str.contains('Stimuli')]
                 .assign(Title = lambda d: d['Subject'].str.split('\\').str[-1])
                 .assign(Type = lambda d: d['Title'].str[0])
                 .assign(NMov = lambda d: d['Title'].str.extract('.(\d+).*.avi'))
                 .reset_index()
                 [['Title','Type','NMov']]
                 ,left_index=True,right_index=True)
         )
    
    return dataf

In [22]:
datadrive = '/data00/'

finroot = '/data02/ritu/2018_7T_14sub_raw/Log/'
foutroot = datadrive + 'layerfMRI/logs/'

In [23]:
logfiles = [f for f in os.listdir(finroot) if 'BAD' not in f]

filesdf = (pd
           .DataFrame(logfiles, columns = ['filename'])
           .assign(subject = lambda d: d['filename'].str.extract('S(\d+)').astype(int))
           .assign(session = lambda d: d['filename'].str.extract('session(\d+)'))
           .assign(run = lambda d: d['filename'].str.extract('run(\d+)'))
           .assign(task = lambda d: d['filename'].str.extract('set(\d+)'))
           .loc[lambda d : (d['subject']!=1)&(d['subject']!=4)&(d['subject']!=7)&(d['subject']!=13)]
           .reset_index()
           [['subject','session','task','run','filename']]
          )
filesdf.sample(2)

Unnamed: 0,subject,session,task,run,filename
36,5,2,4,2,S05_session2_set4_run2-Set4_run2.log
2,2,2,4,1,S02_session2_set4_run1-Set4.log


In [24]:
df = pd.DataFrame()
for i,row in filesdf.iterrows():
    
    sub = int(row.subject)
    ses = int(row.session)
    task = int(row.task)
    run = int(row.run)
    func_fld = foutroot + 'sub-{:02d}/ses-{:02d}/func/'.format(sub,ses)
    
    if not os.path.isdir(func_fld):
        os.makedirs(func_fld)

    # Process the logs
    tdf = read_raw_log(finroot,row)

    fout = func_fld + 'sub-{:02d}_ses-{:02d}_task-{}_run-{}_bold.csv'.format(sub,ses,task,run)
#     tdf.to_csv(fout, index=False)
    
    
    df = (df
          .append(tdf
                  .assign(subject = sub)
                  .assign(session = ses)
                  .assign(task = task)
                  .assign(run = run)
                 )
         )
    
# df.to_csv(foutroot+'log_summary.csv', index = False)