In [6]:
import glob, os
import pandas as pd
import pdb
import subprocess
import numpy as np
from IPython.core import display as ICD


pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_rows', 159)


In [11]:
def report_output():
    print("Subject Notes")
    ICD.display(df_clean.sort_index())
    
    print("\nVolume and file count report")
    ICD.display(bids_df)
    
    print("\nSubjects with only 3 runs: ".format(bids_df_3.index.values))
    ICD.display(bids_df[bids_df["func_train_ct"] == 3])
    
    print("\nSubjects with 2 or less runs: ")
    ICD.display(bids_df[bids_df["func_train_ct"] < 3])
    
    
    missing_df = bids_df[(bids_df == 0).any(axis="columns")]
    
    print("\nSubjects missing files: ")
    ICD.display(missing_df)
    
    
def dmc_report():
    ## Report Output
    print("BBx Session 1 DICOM info:")
    print("\nUnique DICOM directories found: {}".format(s1_sub_dcm_ct))
    #print("\nScan notes subject count: {} \tUnique DICOM directories found: {}".format(s1_sub_exp_ct, s1_sub_dcm_ct))
    #print("DICOM IDs available: \t",sorted(list(map(int, s1_sub_ids ))))

In [9]:
def dicom_check():
    # Get DICOM id list
    s1_dcms = [x.split("/")[-1].split("-")[1].lstrip("0") for x in
                 glob.glob(os.path.join(bids_path, "sourcedata/DICOM/ses-1/sub-*"))]
    s1_dcms = np.unique(np.array(s1_dcms)).tolist()
    # get expected id list from notes
    s1_sub_ids = [x.split("_")[1].lstrip('0') for x in df_clean.index.values]
    s1_sub_ids = np.unique(np.array(s1_sub_ids)).tolist()

    # get total count for DICOM and expected id lists
    s1_sub_exp_ct = len(s1_sub_ids)
    s1_sub_dcm_ct = len(s1_dcms)

    # return the unique values in ar1 that are not in ar2

    # ids found in dicom directory but not id list
    s1_mia_id = np.setdiff1d(s1_dcms, s1_sub_ids)

    # ids missing from dicom directories
    s1_mia_dcm = np.setdiff1d(s1_sub_ids, s1_dcms)

    s1_mia_id = s1_mia_id.tolist()
    s1_mia_dcm = s1_mia_dcm.tolist()

    # look at scan notes for any missing DICOM ids
    print("NOTES ON MISSING SUBJECTS: \n")
    for id_ in s1_mia_dcm:
        bbx_id = "bbx_{:03d}".format(int(id_))
        try:
            print("{}, notes: \n{}\n".format(bbx_id, df_clean.loc[bbx_id]))
        except:
            print("Missing scan notes for {} \n".format(bbx_id))



    dcm_report()



In [10]:
def main():
    bids_path = 'tst'
    
    # setup report tsv
    scan_file =os.path.join(bids_path,"code/qc_report_s1.tsv")
    report = pd.read_csv(scan_file,sep="\t",index_col = "subject")
    
    # get scan notes
    notes_path=os.path.join(bids_path, 'code/w1_notes.csv')
    df_w1_notes=pd.read_csv(notes_path, encoding='latin-1')
    df_clean=df_w1_notes[['participantID', 'w1scan_scannotes']]

    df_clean.set_index("participantID", inplace=True)
    df_clean.index = df_clean.index.str.lower()
    df_clean = df_clean.drop(['participant id (bbx_###)'])


    report_output()
    
    # write file
    #bids_df.to_csv(os.path.join(bids_path, "derivatives/quality_check/ses-1_bids.tsv"), sep="\t")




In [None]:
def build_data_dict():
    
    # if no missing IDs found, that is good, meaning we have at least attempted to generate all of the subject directories available
    # now we have to inspect individual directories
    qa_dict={}
    bids_dirs = glob.glob(os.path.join(bids_path, "sub-*/ses-1"))
    for dir_path in sorted(bids_dirs):
        # get id
        sub_id = dir_path.split("/")[-2]

        # setup dict
        if sub_id not in qa_dict:
            qa_dict[sub_id] = {}

        # gather files
        func_runs=glob.glob(os.path.join(dir_path, "func/*run*nii.gz"))
        func_rest=glob.glob(os.path.join(dir_path, "func/*resting*nii.gz"))
        anat=glob.glob(os.path.join(dir_path, "anat/*.nii.gz"))
        fmaps=glob.glob(os.path.join(dir_path, "fmap/*.nii.gz"))
        #print("Analysis of {}:".format(sub_id))



        # Save the file count for each direcotry into the dictionary: 

        # Functional files:
        if not func_runs:
            qa_dict[sub_id]["func_train_ct"] = 0
        else:
            qa_dict[sub_id]["func_train_ct"] = len(func_runs)
            for func in func_runs:

                filename = func.split("/")[-1]

                fsl_cmd ="fslnvols {}".format(func)
                vol=subprocess.check_output(fsl_cmd, shell=True)
                vol=str(vol,'utf-8').strip()
                if "training" in func:
                    run_id= filename.split("_")[3]
                    expected_vol = 233
                    col_name = run_id+"_vol"

                elif "rl" in func:
                    expected_vol = 212
                    col_name = run_id+"_vol"
                else:
                    expected_vol = 147
                    col_name = "rest_vol"

                qa_dict[sub_id][col_name] = vol   


        # Resting functional files:
        if not func_rest:
            qa_dict[sub_id]["func_rest_ct"] = 0
        else:
            qa_dict[sub_id]["func_rest_ct"] = len(func_rest)
            for func in func_rest:

                fsl_cmd ="fslnvols {}".format(func)
                vol=subprocess.check_output(fsl_cmd, shell=True)
                vol=str(vol,'utf-8').strip()

                qa_dict[sub_id]["resting"] = vol   

        # Fieldmap files:          
        if not fmaps:
            qa_dict[sub_id]["fmap_ct"] = 0
        else:
            qa_dict[sub_id]["fmap_ct"] = len(fmaps)


        # Anatomical files:            
        if not anat:
            qa_dict[sub_id]["anat_ct"] = 0
        else:
            qa_dict[sub_id]["anat_ct"] = len(anat)



    print("\n> Completed dictionary build.")

In [None]:
def bids_check():
    s1_bids = np.unique(np.array([x.split("/")[-2].split("-")[1].lstrip("0") for x in
             glob.glob(os.path.join(bids_path, "sub-*/ses-1"))])).tolist()
    s1_bids_ct = len(s1_bids)

    # subjects found in DICOM list but not in bids
    s1_mia_bids = np.setdiff1d(s1_dcms, s1_bids)

    s1_bids = sorted(list(map(int,  s1_bids)))
    ## report
    #print("BIDS Session 1 Info: ")
    #print("Unique BIDS subject count: {} \n\n>>> Missing IDs: \t{} \n".format(s1_bids_ct, s1_mia_bids))
    #print("BIDS IDs found: ", s1_bids)
    
    qa_dict = data_dict()
    bids_df = pd.DataFrame(qa_dict).T

    