In [20]:
import os
import datetime
import pandas as pd
import numpy as np
import shutil
from pathlib import Path
import json
from file_utilities import makedir, copy_files, get_files, read_json, get_input
from dataframe_utilities import drop_rows_by_value_or_na
from audit import run_df_audit, run_file_audit
# User-specific configuration
uniquename = 'mfield'  # Replace with your unique identifier
dropbox_dir = 'Mary Soules'  # Replace with your Dropbox directory
main_path = f"C:/Users/{uniquename}/University of Michigan Dropbox/{dropbox_dir}"

In [22]:
#set path
# path_to_data = Path(f"{main_path}/MaryScratch/gng/")
path_to_data = Path(f"{main_path}/fMRI_QC/DatabaseToRC/MATOUT/fmri_behavioral/")
onset_file = pd.read_excel("param_files/gng_onsets.xlsx")
final_df = pd.read_csv(f"{main_path}/MaryScratch/matoutmergeddata_shifted.csv") #will need to adjust for those that are not eprime files
subj_list =  pd.read_excel(f"{main_path}/MaryScratch/subj_list.xlsx") #name subject list
win_mount = "X:"

In [23]:
choices = read_json(f"bda_choice_files/study_choices.json")
study = get_input(choices)
study_param_file = f"{main_path}/fMRI_QC/Scripts/Scratch/QualityControl/parameter_files/{study}_params_dict.json"
study_param_dict = read_json(study_param_file)
choices = {str(details['task_num']): task for task, details in study_param_dict['func']['tasks'].items()}
task_name = get_input(choices)
task_param_file = f"param_files/{study}_{task_name}_params.json"
task_param_dict = read_json(task_param_file)

Available options:
0 = training
1 = unity
8 = BrainBasics
9 = PEERS
10 = neuromod
11 = brainism
12 = MICHR
13 = SUDPrediction
14 = MaturingOut




Enter a task number to select:  14



The option with number 14 is 'MaturingOut'.
Available options:
5 = rest
4 = gng
11 = emoreg
12 = cuereactivity




Enter a task number to select:  4



The option with number 4 is 'gng'.


In [36]:
#extraction for eprime data only will need to adjust for excel
all_data = []
ons_cols = task_param_dict['onset_columns']
for index, row in subj_list.iterrows():
    subj_df = None
    subj_d = []
    target = row['Target']
    subject = row['SubjectID']
    print(subject)
    subj_df = final_df[final_df['Subject']==target]
    subj_df = final_df.loc[final_df['Subject'] == target].copy()  # Creates a full copy
    if len(subj_df) > 0:
        subj_path = makedir(f"{path_to_data}/raw/",subject)
        turbo_path = makedir(f"{win_mount}/{task_param_dict['fmri_path']}/",subject) 
        subj_df.loc[:,'SubjectID'] = subject 
        subj_d, investigate = run_file_audit(subj_df,subject,task_param_dict)
        all_data.append(subj_d)       
        
        #start cleaning
        if investigate == 'N':
            ons_df = subj_df[ons_cols]
            ons_df = ons_df.rename(columns=task_param_dict['replace_dict']) #add to json file 
            ons_df = drop_rows_by_value_or_na(ons_df,'MyLetter')
            ons_df['Trial'] = ons_df.groupby('Run').cumcount() + 1
            ons_df_merged = ons_df.merge(onset_file, how='inner', left_on=['MyLetter', 'Run','Trial'], right_on=['MyLetter', 'EprimeRunNumber','Trial'])
            ons_df_merged.fillna(0,inplace=True)
        subj_df.to_csv(f"{subj_path}/mhEventGO_noGO_all_runs_20201020_win10-{target}-raw.csv",index=False)
        if investigate == 'N':
            final_data_path = makedir(f"{path_to_data}/{task_name}/",subject)           
            ons_df_merged.to_csv(f"{final_data_path}/mhEventGO_noGO_all_runs_20201020_win10-{target}-clean.csv",index=False)
            ons_df_merged.to_csv(f"{turbo_path}/mhEventGO_noGO_all_runs_20201020_win10-{target}-clean.csv",index=False)
    else:
        subj_d, _ = run_file_audit(subj_df,subject,task_param_dict,num_files=0)
        all_data.append(subj_d)
        # new_dest = f"{path_to_new}/{subject}"
    # copy_files(file,new_dest)

jeh23mat00068_04824
1
1
jeh23mat00068_04824
1
jeh23mat00147_05898
2
1
jeh23mat00147_05898
2
jeh23mat00149_05878
1
1
jeh23mat00149_05878
1
jeh23mat00150_05882
1
1
jeh23mat00150_05882
1
jeh23mat00141_05811
1
1
jeh23mat00141_05811
1
jeh23mat00151_05807
1
1
jeh23mat00151_05807
1
jeh23mat00148_05776
1
1
jeh23mat00148_05776
1
jeh23mat00142_05754
1
1
jeh23mat00142_05754
1
jeh23mat00140_05747
1
1
jeh23mat00140_05747
1
jeh23mat00162_06100
1
1
jeh23mat00162_06100
1
jeh23mat00179_06191
1
1
jeh23mat00179_06191
1
jeh23mat00160_06187
1
1
jeh23mat00160_06187
1
jeh23mat00177_06165
1
1
jeh23mat00177_06165
1
jeh23mat00185_06163
1
1
jeh23mat00185_06163
1
jeh23mat00174_06154
1
1
jeh23mat00174_06154
1
jeh23mat00166_06141
1
1
jeh23mat00166_06141
1
jeh23mat00161_06019
2
1
jeh23mat00161_06019
2
jeh23mat00165_05969
1
1
jeh23mat00165_05969
1
jeh23mat00145_05966
1
1
jeh23mat00145_05966
1
jeh23mat00156_05979
1
1
jeh23mat00156_05979
1
jeh23mat2300154_05983
1
1
jeh23mat2300154_05983
1
jeh23mat00169_06001
1
1
jeh23m

In [28]:
audit_cols=['Subject','Task','NumberOfFiles','NumberOfRuns','TotalTrials','Investigate']

In [30]:
audit_df = pd.DataFrame(all_data,columns=audit_cols)
audit_df.to_excel(f"{path_to_data}/matout_gng_audit.xlsx")