# Check that hemispheres are:
# 1. not identical
# 2. correclty assigned


In [None]:
import numpy as np
import pandas as pd
import os
import nibabel as nib
os.chdir("/host/verges/tank/data/daniel/01_3T7T/z/code/analyses/")
import tTsTGrpUtils as tsutil
import utils_plots as up
import importlib
import datetime

In [None]:
pth_df_map_pths = "/host/verges/tank/data/daniel/01_3T7T/z/outputs/04c_dfPths_dsMaps_30Nov2025-101445.csv"
dl_pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/04d_dl_maps_26Nov2025-153407.pkl"

# Study details
MICs = {
    "name": "MICs",
    "dir_root": "/data/mica3/BIDS_MICs",
    "dir_raw": "/rawdata",
    "dir_deriv": "/derivatives",
    "dir_mp": "/micapipe_v0.2.0",
    "dir_hu": "/hippunfold_v1.3.0/hippunfold",
    "dir_zb": "/zbrains_clinical",
    "study": "3T",
    "ID_ctrl" : ["HC"], # patterns for control IDs in demographics file
    "ID_Pt" : ["PX"] # patterns for patient IDs in demographics file
    }

PNI = {
    "name": "PNI",
    "dir_root": "/data/mica3/BIDS_PNI",
    "dir_raw": "/rawdata",
    "dir_deriv": "/derivatives",
    "dir_mp": "/micapipe_v0.2.0",
    "dir_hu": "/hippunfold_v1.3.0/hippunfold",
    "dir_zb": "/zbrains_clinical",
    "study": "7T",
    "ID_col" : ["PNC", "Pilot"], # column for ID in demographics file
    }

studies = [MICs, PNI]

# Demographics details
demographics = {
    "df_pths_qc_pth" : "/host/verges/tank/data/daniel/01_3T7T/z/outputs/03b_mapPths_QC_26Nov2025-125407.csv", # NOTE: path to demographics file with merged QC cols produced by 02_demo.ipynb
    # column names:
    'nStudies': True, # whether multiple studies are included
    "ID_7T" : "PNI_ID", 
    "ID_3T" : "MICS_ID",
    "SES" : "SES",
    "date": "Date",
    "age": "age",
    "sex": "sex",
    "grp" : "grp_detailed" # col name for participant grouping variable to use
}

specs  = { # all spec values to be in lists to allow for iteration across these values
    # directories
    'prjDir_root' : "/host/verges/tank/data/daniel/01_3T7T/z", 
    'prjDir_outs' : "/outputs",
    'prjDir_out_stats': "/outputs/stats",
    'prjDir_out_figs': "/outputs/figures",
    'prjDir_maps': "/maps", # output directory for smoothed cortical maps
    'prjDir_dictLists': "/maps/dictLists",
    'prjDir_mapPths' : "/output/paths",
    'prjDir_maps_dfs': "/outputs/dfs/04a_maps_dfs",
    'prjDir_parc_dfs': "/outputs/dfs/04b_maps_parc",
    'prjDir_winComp_dfs': "/outputs/dfs/05a_winComp",
    'prjDir_grpFlip_dfs': "/outputs/dfs/05b_grpFlip",
    'prjDir_winD_dfs': "/outputs/dfs/05c_winD",
    'prjDir_btwD_dfs': "/outputs/dfs/05d_btwComp",

    # downsampling
    'ds_study': ['PNI'], # list of study codes to apply downsampling to
    'ds_foi': ['T1map'], # features to downsample
    'ds_res': [0.8], # resolution (in mm) to downsample volumes to. NOTE. should be same length as ds_foi with each value corresponding to that in ds_foi 
    'ds_vol_dir': '/downsampled_vols', # name of directory within project dir root

    # analysis regions
    'ctx': True, # whether to include cortical analyses
    'surf_ctx': ['fsLR-32k', 'fsLR-5k'],
    'parcellate_ctx': 'glasser', # parcellation to use, or None if no parcellation.
    'parc_lbl_ctx': 'glasser_int', # what name to fetch for parcellation values
    'lbl_ctx': ['midthickness', 'pial', 'white'], # pial, midthick, white, etc
    'ft_ctx': ['thickness', 'T1map'], # features: T1map, flair, thickness, FA, ADC
    'smth_ctx': [5, 10], # in mm
    
    'hipp': True, # whether to include hippocampal analyses
    'surf_hipp': ['den-0p5mm'],
    'parcellate_hipp': 'DK25',
    'parc_lbl_hipp': 'idx',
    'lbl_hipp': ['midthickness', "inner", "outer"], # outer, inner, midthickness, etc
    'ft_hipp': ['thickness', 'T1map'], # features: T1map, flair, thickness, FA, ADC
    'smth_hipp': [2, 5], # in mm
        
    # within study comparisons
    'col_grp': 'grp_detailed',  # column in df_demo with group labels
    'winComp_stats': ['z'], # what stats to run for within study comparisons ('z' for z-scoring, 'w' for w-scoring)
    'covars': [demographics['age'], demographics['sex']],

    'ipsiTo' : 'L', # what hemisphere for controls ipsi should be mapped to
}

In [None]:
importlib.reload(tsutil)

def bin(n_vertices, n_bins):
    import numpy as np
    return np.array_split(np.arange(n_vertices), n_bins)

def describeMapDif(x, y, logger, investigate = False, n_bins = 20, zeroVal = 1e-4):
    """
    Describe the difference in df of same size (eg., maps with v vertices by n participants)
    between two dataframes x and y (eg., left and right hemisphere maps).


    investigate: bool
        If True, then if the mean difference within any bin i more than 1 std from the overall mean, 
        then split this bin into sub-bins and print include those results
    """
    # print summary stats
    logger.info("\tDifference L-R summary stats:")
    binned_stats = [] # init output

    zeroVal_fmt = f"{zeroVal:.0e}"
    subbins = max(1, n_bins // 4)
    
    d = x - y
    d_vals = d.values # convert to array
    all_flat = d_vals.ravel()    
    n_above_all = int(np.count_nonzero(np.logical_and(~np.isnan(all_flat), abs(all_flat) > zeroVal)))
    #  Summary stats across all vertices
    all_stats = pd.Series({
        'name': 'all',
        'n_vertices': d_vals.shape[1],
        'n_subjects': d_vals.shape[0],
        'n_totVals': all_flat.size,
        'mean': np.nanmean(all_flat),
        'std': np.nanstd(all_flat),
        'min': np.nanmin(all_flat),
        '25%': np.nanpercentile(all_flat, 25),
        '50%': np.nanpercentile(all_flat, 50),
        '75%': np.nanpercentile(all_flat, 75),
        'max': np.nanmax(all_flat),
        f'n_above{zeroVal_fmt}': n_above_all,
        f'%_above{zeroVal_fmt}': (n_above_all / all_flat.size) * 100
    })

    binned_stats.append(all_stats)
    subbin_stats = []

    n_subj, n_vertices = d.shape
    idx_grps = bin(n_vertices = n_vertices, n_bins=n_bins)
    for bin_idx, idxs in enumerate(idx_grps, start=1):
        bin_values = d_vals[:, idxs]
        flat_bin = bin_values.ravel()
        n_above = int(np.count_nonzero(np.logical_and(~np.isnan(flat_bin), flat_bin > zeroVal)))
        # Calculate stats directly with numpy (much faster)
        bin_stats = pd.Series({
            'name': f'bin_{bin_idx}',
            'n_vertices': bin_values.shape[1],
            'n_subjects': bin_values.shape[0],
            'n_totVals': flat_bin.size,
            'mean': np.nanmean(flat_bin),
            'std': np.nanstd(flat_bin),
            'min': np.nanmin(flat_bin),
            '25%': np.nanpercentile(flat_bin, 25),
            '50%': np.nanpercentile(flat_bin, 50),
            '75%': np.nanpercentile(flat_bin, 75),
            'max': np.nanmax(flat_bin),
            f'n_above{zeroVal_fmt}': n_above,
            f'%_above{zeroVal_fmt}': (n_above / flat_bin.size) * 100
        })
        binned_stats.append(bin_stats)

        if investigate:
            if bin_stats[f'%_above{zeroVal_fmt}'] >= 5:
                # Further investigate this bin by splitting into sub-bins
                idx_subgrps = bin(n_vertices = bin_values.shape[1], n_bins=subbins)
                for subbin_idx, sub_idxs in enumerate(idx_subgrps, start=1):
                    subbin_vals = bin_values[:,sub_idxs]
                    flat_sub = subbin_vals.ravel()
                    
                    n_above_sub = int(np.count_nonzero(np.logical_and(~np.isnan(flat_sub), flat_sub > zeroVal)))
                    count = subbin_vals.size

                    if count == 0:
                        sub_bin_stats = pd.Series({
                            'name': f'bin_{bin_idx}-{subbin_idx}',
                            'n_vertices': subbin_vals.shape[1],
                            'n_subjects': subbin_vals.shape[0],
                            'n_totVals': count,
                            'mean': np.nan,
                            'std': np.nan,
                            'min': np.nan,
                            '25%': np.nan,
                            '50%': np.nan,
                            '75%': np.nan,
                            'max': np.nan,
                            f'n_above{zeroVal_fmt}': count,
                            f'%_above{zeroVal_fmt}': np.nan
                        })
                    else:
                        sub_bin_stats = pd.Series({
                            'name': f'bin_{bin_idx}-{subbin_idx}',
                            'n_vertices': subbin_vals.shape[1],
                            'n_subjects': subbin_vals.shape[0],
                            'n_totVals': count,
                            'mean': np.nanmean(flat_sub),
                            'std': np.nanstd(flat_sub),
                            'min': np.nanmin(flat_sub),
                            '25%': np.nanpercentile(flat_sub, 25),
                            '50%': np.nanpercentile(flat_sub, 50),
                            '75%': np.nanpercentile(flat_sub, 75),
                            'max': np.nanmax(flat_sub),
                            f'n_above{zeroVal_fmt}': n_above_sub,
                            f'%_above{zeroVal_fmt}': (n_above_sub / count) * 100
                        })
                    subbin_stats.append(sub_bin_stats)
    
    # Combine all bin statistics into a single DataFrame
    binned_df = pd.concat(binned_stats, axis=1)
    logger.info(binned_df.round(3).to_string())
    
    if investigate and subbin_stats:
        logger.info("\n\tInvestigated Sub-bins stats:")
        subbin_df = pd.concat(subbin_stats, axis=1)
        logger.info(subbin_df.round(3).to_string())

def parse_map_colname(s):
    """
    Return dict with keys: region, hemi, surf, label, feature, smoothing (may be None)
    """
    import re
    ft_wrong = False
    # regex tuned for patterns like:
    # ctx_hemi-R_surf-fsLR-32k_label-white_T1map_smth-10mm
    # or zb_ctx_hemi-L_surf-fsLR-5k_label-pial_feature-T1map_smooth-5mm
    
    pat = re.compile(
        r'^(?P<region>[^_]+)_hemi-(?P<hemi>[LR])_surf-(?P<surf>[^_]+)_label-(?P<label>[^_]+)'
        r'(?:_(?:feature-)?(?P<feature>[^_]+))?(?:_(?:smth|smooth)-(?P<smoothing>\d+)mm)?$'
    )

    m = pat.match(s)
    if m:
        
        out = m.groupdict()
        
        if 'smth' in out['feature'] or 'smooth' in out['feature']:
            ft_wrong = True
            smth_kernel = out['feature'].split('-')[1]
            if smth_kernel.endswith('mm'):
                smth_kernel = smth_kernel.replace('mm','')
            out['smoothing'] = smth_kernel
            
        # ensure feature extracted (sometimes appears before label or without "feature-")
        if out['label'] == 'thickness':
            out['feature'] = 'thickness'
            out['label'] = 'midthickness'
        elif out['feature'] is None or ft_wrong:
            # try to get trailing token after label (e.g. ..._label-white_T1map_smth-10mm)
            parts = s.split('_')
            if len(parts) >= 5:
                out['feature'] = parts[4].replace('feature-', '')
        return out

    # fallback: structured split (best-effort)
    parts = s.split('_')
    out = {'region': None, 'hemi': None, 'surf': None, 'label': None, 'feature': None, 'smoothing': None}
    try:
        out['region'] = parts[0]
        out['hemi'] = parts[1].split('-', 1)[1]
        out['surf'] = parts[2].split('-', 1)[1]
        # label token may be 'label-XXX' or 'label-XXX_feature-YYY'
        lab_tok = parts[3]
        out['label'] = lab_tok.split('-', 1)[1]
        
        if out['label'] == 'thickness':
            out['feature'] = 'thickness'
        else:
            # Find the feature between 'label-{label}_' and the next '_'
            label_pattern = f"label-{out['label']}_"
            if label_pattern in s:
                after_label = s.split(label_pattern, 1)[1]
            # Get everything before the next '_' or end of string
            feature_part = after_label.split('_')[0]
            # Remove any prefix like 'feature-'
            out['feature'] = feature_part.replace('feature-', '')
            
        # Fix smoothing search - look for smoothing tokens in the original string
        for part in parts:
            if 'smth-' in part or 'smooth-' in part:
                if 'smth-' in part:
                    smoothing_part = part.split('smth-')[1]
                else:
                    smoothing_part = part.split('smooth-')[1]
                if smoothing_part.endswith('mm'):
                    out['smoothing'] = smoothing_part.replace('mm','')  # Remove 'mm' suffix
                break            
        
    except Exception:
        pass

    return out

def correspZBCol(tTsTCol_l, tTsTCol_r):
    """
    Take name of 3T7T smoothed map path column and return corresponding zb smoothed map path column.
    """
    d_l = parse_map_colname(tTsTCol_l)
    d_r = parse_map_colname(tTsTCol_r)
    
    #print(tsutil.print_dict(d_l, return_txt = True))
    #print(tsutil.print_dict(d_r, return_txt = True))
    
    assert d_l['region'] == d_r['region'], "Regions do not match"
    
    zb_base_l = f"zb_{d_l['region']}_hemi-{d_l['hemi']}"
    zb_base_r = f"zb_{d_r['region']}_hemi-{d_r['hemi']}"
    
    if d_l['region'] == 'hipp':
        zb_l = f"{zb_base_l}_surf-{d_l['surf']}_label-{d_l['label']}_feature-{d_l['feature']}_smooth-{d_l['smoothing']}mm"
        zb_r = f"{zb_base_r}_surf-{d_r['surf']}_label-{d_r['label']}_feature-{d_r['feature']}_smooth-{d_r['smoothing']}mm"
    else:
        zb_l = f"{zb_base_l}_surf-{d_l['surf']}_label-{d_l['label']}_feature-{d_l['feature']}_smooth-{d_l['smoothing']}mm"
        zb_r = f"{zb_base_r}_surf-{d_r['surf']}_label-{d_r['label']}_feature-{d_r['feature']}_smooth-{d_r['smoothing']}mm"
    return zb_l, zb_r

def zbrainsMaps(study, id, ses, region, feat, lbl, surf, smth):
    """
    Gets path to z-brains smoothed maps

    Inputs:
        study: dict
            paths to directories including root, deriv, z-brains
        id: str
            participant ID (no sub-)
        ses: str
            session ID (no ses-)
        region: str
            'cortex'/'ctx' or 'hippocampus'/'hip'
        feat: str
            feature name
        lbl: str
            label name (ie. midthickness, white, inner, outer etc)
        surf: str
            surface name (fsLR-5k, fsLR-32k)
        smth: int
            smoothing value in mm

    Returns L, R if both exist
    """
    import os
    import numpy as np
    if region == 'ctx' or region == 'cortex':
        region = 'cortex'
    elif region == 'hip' or region == 'hippocampus':
        region = 'hippocampus'
    else:
        raise ValueError(f"region <{region}> not recognized. Should be 'cortex' or 'hippocampus'")
    
    if smth == 'NA' or smth == 0: # should get unsmoothed path from micapipe outputs
        root_mp = f"{study['dir_root']}{study['dir_deriv']}{study['dir_mp']}/sub-{id}/ses-{ses}/maps"
        if feat == "thickness":
            out_pth_L_filename = f"hemi-L_surf-{surf}_label-{feat}"
            out_pth_R_filename = f"hemi-R_surf-{surf}_label-{feat}"
        else:
            out_pth_L_filename = f"hemi-L_surf-{surf}_label-{lbl}_{feat}"
            out_pth_R_filename = f"hemi-R_surf-{surf}_label-{lbl}_{feat}"
            
        # ii. Paths to micapipe unsmoothed maps    
        pth_L = f"{root_mp}/sub-{id}_ses-{ses}_{out_pth_L_filename}.func.gii"
        pth_R = f"{root_mp}/sub-{id}_ses-{ses}_{out_pth_R_filename}.func.gii"

    else: # get zbrains smoothed map paths
        base_pth = f"{study['dir_root']}{study['dir_deriv']}{study['dir_zb']}/sub-{id}/ses-{ses}/maps/{region}"
        if region == 'cortex':
            pth_L = f"{base_pth}/sub-{id}_ses-{ses}_hemi-L_surf-{surf}_label-{lbl}_feature-{feat}_smooth-{smth}mm.func.gii"
            pth_R = f"{base_pth}/sub-{id}_ses-{ses}_hemi-R_surf-{surf}_label-{lbl}_feature-{feat}_smooth-{smth}mm.func.gii"
        else:
            pth_L = f"{base_pth}/sub-{id}_ses-{ses}_hemi-L_{surf}_label-{lbl}_feature-{feat}_smooth-{smth}mm.func.gii"
            pth_R = f"{base_pth}/sub-{id}_ses-{ses}_hemi-R_{surf}_label-{lbl}_feature-{feat}_smooth-{smth}mm.func.gii"

    
    if os.path.exists(pth_L) == True and os.path.exists(pth_R) == True:   
        print(f"[zbrainsMaps] \tL: {pth_L}\n\t\tR: {pth_R}")
        return pth_L, pth_R
    else:
        print(f"[zbrainsMaps] NO EXIST \t\tL: {pth_L}\n\t\t\tR: {pth_R}")
        return np.nan, np.nan


In [None]:
# 1. Compare hemispheres (3T7T data only)
dl = tsutil.loadPickle(dl_pth)

log_pth = f"/host/verges/tank/data/daniel/01_3T7T/z/outputs/debug/logs/mapDifs_{datetime.datetime.now().strftime('%d%b%Y-%H%M%S')}"
logger = tsutil._get_file_logger(__name__, log_file_path=log_pth)
logger.info(f"Log started: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
logger.info(f"[debug script: mapValues] Comparing L and R hemisphere maps.\n\t If processing is correct, these difference values should not be 0")
print(f"Logging to: {log_pth}")
tsutil.print_dict(dl)

for i, itm in enumerate(dl):
    print(f"{i}/{len(dl)}...")
    logger.info(f"{'-'*100}\n{tsutil.printItemMetadata(itm, return_txt=True)}") 
    mps = tsutil.loadPickle(itm['df_maps'])
    mps_L, mps_R = tsutil.splitHemis(mps, rmv_lbl=True)
    describeMapDif(mps_L, mps_R, logger)
print(f"Log saved to: {log_pth}")

In [None]:
# 2. Compare 3T7T maps to zbrains maps

import datetime

## 0. Load df with paths to 3T7T smoothed maps
reimport_src = False
if 'df_clean' not in globals() or df_clean is None or reimport_src == True:
    df_clean_ds = pd.read_csv(pth_df_map_pths, dtype=str)
df_zb = df_clean_ds.copy()
print(f'Initial shape df: {df_zb.shape}')

## a. get path to z-brains smoothed maps and add to df_clean_ds
for idx, row in df_zb.iterrows():
    study_name = row['study']
    if study_name == '3T':
        study = MICs
    elif study_name == '7T':
        study = PNI
    else:
        raise ValueError(f"study name <{study_name}> not recognized [index: {idx}]")
    
    id_ = row[demographics['ID_3T']] if study_name == '3T' else row[demographics['ID_7T']]
    ses = row['SES']

    if specs['ctx']:
        for feat in specs['ft_ctx']:
            for lbl in specs['lbl_ctx']:
                for surf in specs['surf_ctx']:
                    for smth in specs['smth_ctx']:
                        pth_L, pth_R = zbrainsMaps(study, id_, ses, 'cortex', feat, lbl, surf, smth)
                        col_L = f"zb_ctx_hemi-L_surf-{surf}_label-{lbl}_feature-{feat}_smooth-{smth}mm"
                        col_R = f"zb_ctx_hemi-R_surf-{surf}_label-{lbl}_feature-{feat}_smooth-{smth}mm"
                        df_zb.at[idx, col_L] = pth_L
                        df_zb.at[idx, col_R] = pth_R
                        #assert 0 == 1, "STOP"

    if specs['hipp']:
        for feat in specs['ft_hipp']:
            for lbl in specs['lbl_hipp']:
                for surf in specs['surf_hipp']:
                    for smth in specs['smth_hipp']:
                        
                        pth_L, pth_R = zbrainsMaps(study, id_, ses, 'hippocampus', feat, lbl, surf, smth)
                        col_L = f"zb_hipp_hemi-L_surf-{surf}_label-{lbl}_feature-{feat}_smooth-{smth}mm"
                        col_R = f"zb_hipp_hemi-R_surf-{surf}_label-{lbl}_feature-{feat}_smooth-{smth}mm"
                        
                        df_zb.at[idx, col_L] = pth_L
                        df_zb.at[idx, col_R] = pth_R
                        
print(f'Final shape df: {df_zb.shape}')

if 'UID_study_ses' in df_zb.columns:
    try:
        df_zb.drop(columns='UID_study_ses', inplace=True)
    except:
        print("[main] could not drop column UID_study_ses")


# save df_zb_clean
out_pth = f"{specs['prjDir_root'] + specs['prjDir_outs']}/debug/04c_dfPths_dsMaps_withZb_{datetime.datetime.now()}.csv"
df_zb.to_csv(out_pth, index=False)
print(f"[main] df_zb_clean saved to {out_pth}")

"""
## create dictionary list for z-brains smoothed maps
coi = [c for c in df_zb.columns if c.startswith('zb')]
cols_L, cols_R = tsutil.get_mapCols(coi, verbose=True)

dl_hipp_zb = tsutil.extractMap(df_mapPaths = df_zb, cols_L = cols_L, cols_R = cols_R, 
                                specs = specs, studies = studies, demographics = demographics, qc_thresh = 2,
                                save_df_pth = specs['prjDir_root'] + specs['prjDir_maps_dfs'], log_save_pth = specs['prjDir_root'] + specs['prjDir_outs'],
                                region = "hippocampus", verbose=True, test = test)
"""

## Load 3T7T, zBrains data and compare 
df_pths_zb = out_pth # has paths to 3T7T smoothed maps and zbsmoothed maps

# for rows with analogous paths to 3T7T and zb maps, load both and compute difference
df_pths = pd.read_csv(df_pths_zb)
cols_L, cols_R = tsutil.get_mapCols(df_pths.columns, verbose=False)

zb_cols_l = [c for c in cols_L if 'zb_' in c]
zb_cols_r = [c for c in cols_R if 'zb_' in c]

# remove all elements in zb_cols_l from cols_L
cols_L = [c for c in cols_L if c not in zb_cols_l]
cols_R = [c for c in cols_R if c not in zb_cols_r]

df_pths['ID'] = df_pths.apply(lambda row: row['MICS_ID'] if row['study'] == '3T' else row['PNI_ID'], axis=1) # create single ID col

log_pth = f"/host/verges/tank/data/daniel/01_3T7T/z/outputs/debug/logs/mapDifs_smoothed-3T7T-zb_{datetime.datetime.now().strftime('%d%b%Y-%H%M%S')}"
logger = tsutil._get_file_logger(__name__, log_file_path=log_pth)
print(f"Logging to: {log_pth}")
logger.info(f"Summary stats for difference between 3T7T smoothed maps and zBrains smoothed maps...\n")

for idx, c_l, c_r in zip(range(len(cols_L)), cols_L, cols_R):
    
    print(f"{idx}/{len(cols_L)}...")
    
    if ('unsmth' in c_l or 'unsmth' in c_r) or ('res-0p8' in c_l or 'res-0p8' in c_r):
        #logger.info(f"{'-'*100}\n\t3T7T cols [idx: {idx}]:\n\t\t{c_l}\t\t|\t{c_r}")
        #logger.info(f"\tSkipping (smth=0 or res=0.8mm)\n")
        continue
    
    zb_l, zb_r = correspZBCol(c_l, c_r)
    logger.info(f"{'-'*100}\n\t3T7T cols [idx: {idx}]:\n\t\t{c_l}\t\t|\t{c_r}\n\tzb cols [idx: {idx}]:\n\t\t{zb_l}\t\t|\t{zb_r}")
    
    # A. Find rows with valid paths in zb, 3T7T cols
    df_valid = df_pths[
        df_pths[c_l].notna() & df_pths[zb_l].notna() &
        df_pths[c_r].notna() & df_pths[zb_r].notna()
    ]

    if df_valid.empty:
        logger.info("\tSkipping, 0 rows.\n")
        continue
    
    # B. Read in maps
    logger.info(f"\t{len(df_valid)} valid rows")
    
    tTsT_maps = tsutil.get_maps(df_valid, mapCols = [c_l, c_r], col_ID = 'ID', col_study = 'study')
    zb_maps = tsutil.get_maps(df_valid, mapCols = [zb_l, zb_r], col_ID = 'ID', col_study = 'study')

    logger.info(f"\ttTsT [{tTsT_maps.shape}]: {tTsT_maps.index.tolist()}")
    logger.info(f"\tzb   [{zb_maps.shape}]: {zb_maps.index.tolist()}")
    
    # C. take difference and print summary stats to log
    describeMapDif(tTsT_maps, zb_maps, logger, n_bins=12, investigate=True)
print(f"Log saved to: {log_pth}")