Method to clean up the JSON outputs of regional fitting into a TSV

In [2]:
import pandas as pd
import numpy as np
import json
import glob
import os
import pathlib

In [3]:
def safe_load_meta(input_metadata_file: str) -> dict:
    """
    Function to load a general metadata json file.

    Args:
        input_metadata_file (str): Metadata file to be read.

    Returns:
        metadata (dict): The metadata in dictionary format.
    """
    if not os.path.exists(input_metadata_file):
        raise FileNotFoundError(f"Metadata file {input_metadata_file} not found. Does it have a different path?")

    with open(input_metadata_file, 'r', encoding='utf-8') as meta_file:
        metadata = json.load(meta_file)
    return metadata

In [4]:
ex_meta = safe_load_meta('/export/scratch1/PETPAL/VATDYS/derivatives/petpal/sub-PIB10081/ses-VYr6/km/sub-PIB10081_ses-VYr6_model-mrtm1_km_desc-mrtm_seg-Leftputamen_fitprops.json')

In [5]:
def copy_metadata_sans_list(metadata: dict) -> dict:
    """
    Given a metadata dictionary, return an identical dictionary with any list-like data replaced
    with individual values. Useful when converting several JSON files into a TSV file.

    Args:
        metadata (dict): The metadata file that may contain lists of data.

    Returns:
        metadata_for_tsv (dict): The same metadata with list-like data replaced with individual values.

    Note:
        List-like data is replaced by renaming the key it appears in with ordinal values. E.g. if metadata
        contains a key named ``FitPars`` with value [4,6] then the function would create two new keys,
        FitPars_1 and Fit_Pars2 with values 4 and 6 respectively.
    """
    metadata_for_tsv = {}
    for key in metadata:
        data = metadata[key]
        if isinstance(data,list):
            for i,val in enumerate(data):
                key_new = f'{key}_{i+1}'
                metadata_for_tsv[key_new] = val
        else:
            metadata_for_tsv[key] = metadata[key]
    return metadata_for_tsv

In [6]:
copy_metadata_sans_list(ex_meta)

{'BP': 4.17272,
 'k2Prime': 0.01277,
 'ThresholdTime': 10,
 'StartFrameTime': 10.82,
 'EndFrameTime': 115.82,
 'NumberOfPointsFit': 22,
 'RawFits_1': 0.03669,
 'RawFits_2': -0.00709,
 'RawFits_3': 2.87172,
 'SimulatedFits': None,
 'FilePathRTAC': '/export/scratch1/PETPAL/VATDYS/derivatives/petpal/sub-PIB10081/ses-VYr6/tacs/sub-PIB10081_ses-VYr6_seg-WMRef_tac.tsv',
 'FilePathTTAC': '/export/scratch1/PETPAL/VATDYS/derivatives/petpal/sub-PIB10081/ses-VYr6/tacs/sub-PIB10081_ses-VYr6_seg-LeftPutamen_tac.tsv',
 'MethodName': 'MRTM'}

In [7]:
fit_results_all = glob.glob('/export/scratch1/PETPAL/VATDYS/derivatives/petpal/sub-PIB10081/ses-VYr6/km/*.json')
km_fits = pd.DataFrame()

In [8]:
i = 0
for fit in fit_results_all:
    fit_load = safe_load_meta(fit)
    fit_clean = copy_metadata_sans_list(fit_load)
    fit_pd = pd.DataFrame(fit_clean,index=[i])
    km_fits = pd.concat([km_fits,fit_pd])
    i += 1


In [9]:
km_fits['BP']

0          NaN
1          NaN
2      0.09796
3      0.40989
4      0.28930
        ...   
112    0.66290
113    0.08667
114    0.36655
115    0.26330
116    0.26619
Name: BP, Length: 117, dtype: float64

In [10]:
def infer_sub_ses_from_tac_path(tac_path: str):
    """
    Infers subject and session IDs from a TAC file path by analyzing the filename.

    This method extracts subject and session IDs from the filename of a TAC file. It checks the 
    presence of a `sub-` and `ses-` marker in the filename, which is followed by the subject and 
    session respectively. This segment name is then formatted with each part capitalized. If no 
    subject or session is found a generic value of `UNK` is returned.

    Args:
        tac_path (str): Path of the TAC file.
        tac_id (int): ID of the TAC.

    Returns:
        tuple: Inferred subject and session IDs.
    """
    path = pathlib.Path(tac_path)
    assert path.suffix == '.tsv', '`tac_path` must point to a TSV file (*.tsv)'
    filename = path.name
    fileparts = filename.split("_")
    subname = 'XXXX'
    for part in fileparts:
        if 'sub-' in part:
            subname = part.split('sub-')[-1]
            break
    if subname == 'XXXX':
        subname = 'UNK'
    else:
        name_parts = subname.split("-")
        subname = ''.join(name_parts)
    sesname = 'XXXX'
    for part in fileparts:
        if 'ses-' in part:
            sesname = part.split('ses-')[-1]
            break
    if sesname == 'XXXX':
        subname = 'UNK'
    else:
        name_parts = sesname.split("-")
        sesname = ''.join(name_parts)
    return subname, sesname
    

In [11]:
infer_sub_ses_from_tac_path(fit_clean['FilePathTTAC'])

('PIB10081', 'VYr6')

In [20]:
def km_regional_fits_to_tsv(fit_results_dir: str, out_tsv_dir: str):
    """
    Tidies the output of regional kinetic modeling results by converting JSON files into a TSV file
    with one row per fit region. Accomodates lists by converting them into key-value pairs. Assigns
    a subject and session to each row inferred from the original TAC file path.

    Requires fields to be identical across all JSON results files.

    Args:
        fit_results_dir (str): Directory where RTM results are stored in JSON files.
        out_tsv_dir (str): Path where resulting TSV file containing fit results will be stored.

    Returns:
        km_fits (pd.DataFrame): DataFrame containing KM fit data for all regions.
    """
    fit_results_jsons = glob.glob(os.path.join(fit_results_dir,'*.json'))
    km_fits = pd.DataFrame()
    for i,fit in enumerate(fit_results_jsons):
        fit_load = safe_load_meta(fit)
        fit_clean = copy_metadata_sans_list(fit_load)
        sub, ses = infer_sub_ses_from_tac_path(fit_clean['FilePathTTAC'])
        fit_clean['sub_id'] = sub
        fit_clean['ses_id'] = ses
        fit_pd = pd.DataFrame(fit_clean,index=[i])
        km_fits = pd.concat([km_fits,fit_pd])
    km_fits.to_csv(out_tsv_dir,sep='\t')
    return km_fits

In [21]:
fits = km_regional_fits_to_tsv(fit_results_dir='/export/scratch1/PETPAL/VATDYS/derivatives/petpal/sub-PIB10081/ses-VYr6/km/',out_tsv_dir='/tmp/dummy.tsv')