In [28]:
import pandas as pd
import os
import re
from shutil import copyfile
import json

In [29]:
#Get Data Filepath

print("Data needs to be in format: \n\
       Project Filename            \n\
        /          \\              \n\
    NIMS_data  BIDS_info.xlsx      \n\
       /                           \n\
Sub1 Sub2 Sub3                     \n\n\
                                   \n\
Please drag in project filepath    \n")
      
project_filepath = input()

      
#path variables
BIDS= project_filepath + '/BIDS_data/'
NIMS= project_filepath + '/NIMS_data/'


Data needs to be in format: 
       Project Filename            
        /          \              
    NIMS_data  BIDS_info.xlsx      
       /                           
Sub1 Sub2 Sub3                     

                                   
Please drag in project filepath    

/Users/mdclark/Desktop/PracticeProject


In [76]:
#Read files
xls = pd.ExcelFile(project_filepath + '/BIDS_info.xlsx')


#Make folder if folder doesn't exist function
def makefolder(name):
    if not os.path.exists(name):
        os.makedirs(name)

In [77]:
#Load and Clean XLS File
participants = xls.parse('participants')
protocol = xls.parse('protocol', convert_float=False).iloc[1:,:6] #columns 5 on are reference columns
protocol = protocol.dropna(axis=0, thresh=3) #get rid of items that don't have a bids equivalent
protocol.run_number = protocol.run_number.astype('str').str.strip('.0').str.zfill(2) #Convert run int to string


#Create "bold" portion of filename
protocol['bold_filename'] = ''
protocol.loc[protocol['ANAT_or_FUNC'] == 'func', 'bold_filename'] = '_bold'

#Concatanate filename and clean
protocol["BIDS_scan_title_path"] = BIDS + "sub-###/" + protocol.ANAT_or_FUNC + "/sub-###_" + protocol.BIDS_scan_title + "_run-" + protocol.run_number + protocol.bold_filename + ".nii.gz"
protocol.BIDS_scan_title_path = protocol.BIDS_scan_title_path.str.replace('_run-nan', '') #For items that don't have runs

#Create list for NIMS -> bids conversion
NIMS_protocol_filenames = protocol.NIMS_scan_title.tolist() #Convert protocol scan titles to list
NIMS_BIDS_conversion = protocol[["NIMS_scan_title","BIDS_scan_title_path"]]

In [83]:
def check_against_protocol(participants,protocol): 
    
    all_files_correct = True

    for index, row in participants.iterrows():
        
        try:
            NIMS_participant_filenames = os.listdir(NIMS + row.nims_title)
            #Delete all non-nii.gz files
            NIMS_participant_filenames = [x for x in NIMS_participant_filenames if ".nii.gz"  in x]

            for item in set(NIMS_protocol_filenames):
                directory_filenames = [x for x in NIMS_participant_filenames if item in x]
                protocol_filenames = NIMS_BIDS_conversion[NIMS_BIDS_conversion.NIMS_scan_title.str.contains(item)]
                protocol_filenames = protocol_filenames.iloc[:,1].tolist()

                if len(directory_filenames) == len(protocol_filenames):
                    print("sub-" + str(row.participant_id) + ": ++ " + item.rjust(20) + " match")

                else:
                    print("sub-" + str(row.participant_id) + ": -- "+ item.rjust(20) + " files do not match protocol")
                    all_files_correct = False
            print("------------")
        except:
            all_files_correct = False
            print("sub-" + str(row.participant_id) + ": ERROR - folder is missing \n------")

        
        
        
    print("\nAll your folders match your protocol\n") if all_files_correct else print("\nSome folders do not match your protocol, please resolve errors\n")
    
    return all_files_correct

In [84]:
def write_text_files(participants, protocol): 
    
    def to_file(filename, content): 
        with open(BIDS + filename + ".json", "w") as text_file:
            text_file.write(content)
    
    #Data Description
    dataset_description = json.dumps({"BIDSVersion": "1.0.0", \
                                   "License": "", \
                                   "Name": "dummy task name",\
                                   "ReferencesAndLinks": ""})
    to_file("dataset_description", dataset_description)
    

    #Task Description
    for item in set(protocol.loc[protocol.ANAT_or_FUNC == "func", 'BIDS_scan_title']):
        full_task_name = protocol.loc[protocol.BIDS_scan_title == item, 'full_task_name']
        full_task_name = full_task_name.reset_index(drop=True)[0] #Gets first instance of RT
        
        repetition_time = protocol.loc[protocol.BIDS_scan_title == item, 'repetition_time']
        repetition_time = repetition_time.reset_index(drop=True)[0] #Gets first instance of RT
        task_json = json.dumps({"RepetitionTime": repetition_time, "TaskName" : full_task_name})

        to_file(item + "_bold", task_json)

    #TSV
    participant_tsv = participants.loc[:, ['participant_id', 'sex', 'age']]
    participant_tsv.loc[:, 'participant_id'] = 'sub-' + participant_tsv.loc[:, 'participant_id'].astype(str)
    participant_tsv.to_csv(BIDS + 'participants.tsv', sep='\t', index=False)

In [89]:
def convert_to_bids(participants, protocol):
    
    print("Comparing Folders to Protocol...\n")
    
    if check_against_protocol(participants,protocol):
        
        print("Creating BIDS_data folder\n")
        #Make BIDS Folder
        makefolder(BIDS)
        participants.participant_id.apply(lambda x: makefolder(BIDS + 'sub-' + str(x) + "/anat"))
        participants.participant_id.apply(lambda x: makefolder(BIDS + 'sub-' + str(x) + "/func"))
        
        for index, row in participants.iterrows():
            #Get files
            NIMS_participant_filenames = os.listdir(NIMS + row.nims_title)

            #Delete all non-nii.gz files
            NIMS_participant_filenames = [x for x in NIMS_participant_filenames if ".nii.gz"  in x]

            for item in set(NIMS_protocol_filenames):
                directory_filenames = [x for x in NIMS_participant_filenames if item in x]
                protocol_filenames = NIMS_BIDS_conversion[NIMS_BIDS_conversion.NIMS_scan_title.str.contains(item)]
                protocol_filenames = protocol_filenames.iloc[:,1].tolist()

                assert len(directory_filenames) == len(protocol_filenames), "filelists aren't the same length"

                for index, item in enumerate(directory_filenames):
                    oldpath = (NIMS + row.nims_title + "/" + directory_filenames[index])
                    newpath = (protocol_filenames[index].replace("###", str(row.participant_id)))
                    copyfile(oldpath, newpath)

                    print("sub-" + str(row.participant_id) + ": ++ "+ os.path.basename(newpath).rjust(20))
            print("------------")

        print("\nCreating JSON and .tsv Files")
        
        write_text_files(participants, protocol)
       
        print("\nDone!")

In [90]:
convert_to_bids(participants, protocol)

Comparing Folders to Protocol...

sub-102: ++   BOLD_EPI_29mm_2sec match
sub-102: ++        T1w_9mm_BRAVO match
------------

All your folders match your protocol

Creating BIDS_data folder

sub-102: ++ sub-102_task-emp_run-01_bold.nii.gz
sub-102: ++ sub-102_task-emp_run-02_bold.nii.gz
sub-102: ++ sub-102_task-emp_run-03_bold.nii.gz
sub-102: ++ sub-102_task-emp_run-04_bold.nii.gz
sub-102: ++   sub-102_T1w.nii.gz
------------

Creating JSON and .tsv Files

Done!
