### Main points

#### Group analysis across ROI

- Test significance of control vs. patient of pRF size for fovea and periphery
- Test significance of control vs. patient of pRF CM for fovea and periphery
- Test significance of fovea vs. periphery of pRF size
- Test significance of fovea vs. periphery of pRF CM
  

In [1]:
# General imports
import os
import sys
import json
import glob
import ipdb
import pandas as pd
import numpy as np
import pingouin as pg
deb = ipdb.set_trace

In [2]:
# Inputs
data_dir = '/home/mszinte/disks/meso_S/data'
code_dir = '/home/mszinte/disks/meso_H/projects'
amblyo_dir = 'amblyo_prf'
control_dir = 'RetinoMaps'

# load amblyo settings
with open('{}/{}/analysis_code/settings.json'.format(code_dir, amblyo_dir)) as f:
    json_s = f.read()
    amblyo_analysis_info = json.loads(json_s)
amblyo_subjects = amblyo_analysis_info['subjects_patient']
strab_subjects = amblyo_analysis_info['subjects_strab']
aniso_subjects = amblyo_analysis_info['subjects_aniso']
mixed_subjects = amblyo_analysis_info['subjects_mixed']
formats = amblyo_analysis_info['formats']
foveal_bound = amblyo_analysis_info['foveal_bound']
rois = amblyo_analysis_info['rois']
stats_th = amblyo_analysis_info['stats_th']

# load control settings
with open('{}/{}/analysis_code/settings.json'.format(code_dir, control_dir)) as f:
    json_s = f.read()
    control_analysis_info = json.loads(json_s)
control_subjects = control_analysis_info['subjects']

# define analysis settings
datasets = ['amblyo', 'control']
ecc_categories = ['foveal', 'peripheral']
alternatives = ['two-sided', 'less', 'greater']

#### PRF eccentricity category (foveal/peripheral): indivudal analysis

In [3]:
# Amblyopic patients

for format_ in formats:
    print('Format: {}'.format(format_))
    
    for dataset in ['amblyo', 'control']:
        if dataset == 'amblyo':
            dataset_dir = amblyo_dir
            list_subjects = amblyo_subjects
        elif dataset == 'control':
            dataset_dir = control_dir
            list_subjects = control_subjects
        print('Dataset: {}'.format(dataset))
        
        for subject in list_subjects:
            # Load data thresholded
            tsv_dir = '{}/{}/derivatives/pp_data/{}/{}/prf/tsv'.format(
                data_dir, dataset_dir, subject, format_)
            tsv_data_th_fn = "{}/{}_prf_violins.tsv".format(tsv_dir, subject)
            data_th = pd.read_table(tsv_data_th_fn, sep="\t")
    
            # Define subject type
            if dataset == 'amblyo':
                if subject in strab_subjects: 
                    subject_type = 'patient'
                    amblyo_type = 'strab'
                elif subject in aniso_subjects: 
                    subject_type = 'patient'
                    amblyo_type = 'aniso'
                elif subject in mixed_subjects: 
                    subject_type = 'patient'
                    amblyo_type = 'mixed'
            elif dataset == 'control':
                subject_type = 'control'
                amblyo_type = 'control'
            
            # Define pRF eccenticity category (foveal or peripheral)
            def categorize_ecc(value):
                if value <= foveal_bound: return 'foveal'
                else: return 'peripheral'
            data_th['ecc_category'] = data_th['prf_size'].apply(categorize_ecc)
    
            # Determine weighted average and std per subjects of pRF size for fovea and periphery
            def weighted_average(df_groupby, column_data, column_weight):
                return (df_groupby[column_data] * df_groupby[column_weight]).sum() / df_groupby[column_weight].sum()

            # Compute table of weighted average by 'roi' and 'ecc_category'
            df_prf_ecc_cat = data_th.groupby(['roi', 'ecc_category'])[['prf_size', 'prf_loo_r2']].apply(
                weighted_average, 'prf_size', 'prf_loo_r2').reset_index(name='prf_size_weighted_avg')
            df_prf_ecc_cat['pcm_weighted_avg'] = data_th.groupby(['roi', 'ecc_category'])[['pcm', 'prf_loo_r2']].apply(
                weighted_average, 'pcm', 'prf_loo_r2').reset_index()[0]
            df_prf_ecc_cat['prf_loo_r2_weighted_avg'] = data_th.groupby(['roi', 'ecc_category'])[['prf_loo_r2']].apply(
                weighted_average, 'prf_loo_r2', 'prf_loo_r2').reset_index()[0]
            df_prf_ecc_cat['prf_size_weighted_avg'] = data_th.groupby(['roi', 'ecc_category'])[['prf_size', 'prf_loo_r2']].apply(
                weighted_average, 'prf_size', 'prf_loo_r2').reset_index()[0]
            df_prf_ecc_cat['vert_area_sum'] = data_th.groupby(['roi', 'ecc_category'])['vert_area'].sum().reset_index()['vert_area']/100
            df_prf_ecc_cat['subject'] = subject
            df_prf_ecc_cat['subject_type'] = subject_type
            df_prf_ecc_cat['amblyo_type'] = amblyo_type
            
            # Save tsv
            tsv_prf_ecc_cat_fn = "{}/{}_prf_ecc_cat.tsv".format(tsv_dir, subject)
            print('Saving tsv ({}: {} {}): {}'.format(subject, subject_type, amblyo_type, tsv_prf_ecc_cat_fn))
            df_prf_ecc_cat.to_csv(tsv_prf_ecc_cat_fn, sep="\t", na_rep='NaN', index=False)

Format: fsnative
Dataset: amblyo
Saving tsv (sub-01: patient aniso): /home/mszinte/disks/meso_S/data/amblyo_prf/derivatives/pp_data/sub-01/fsnative/prf/tsv/sub-01_prf_ecc_cat.tsv
Saving tsv (sub-02: patient strab): /home/mszinte/disks/meso_S/data/amblyo_prf/derivatives/pp_data/sub-02/fsnative/prf/tsv/sub-02_prf_ecc_cat.tsv
Saving tsv (sub-03: patient strab): /home/mszinte/disks/meso_S/data/amblyo_prf/derivatives/pp_data/sub-03/fsnative/prf/tsv/sub-03_prf_ecc_cat.tsv
Saving tsv (sub-04: patient aniso): /home/mszinte/disks/meso_S/data/amblyo_prf/derivatives/pp_data/sub-04/fsnative/prf/tsv/sub-04_prf_ecc_cat.tsv
Saving tsv (sub-05: patient strab): /home/mszinte/disks/meso_S/data/amblyo_prf/derivatives/pp_data/sub-05/fsnative/prf/tsv/sub-05_prf_ecc_cat.tsv
Saving tsv (sub-06: patient mixed): /home/mszinte/disks/meso_S/data/amblyo_prf/derivatives/pp_data/sub-06/fsnative/prf/tsv/sub-06_prf_ecc_cat.tsv
Saving tsv (sub-07: patient aniso): /home/mszinte/disks/meso_S/data/amblyo_prf/derivatives/

#### PRF eccentricity category (foveal/peripheral): inter-group analysis

In [47]:
# Get data of all subjects and combined them
for format_ in formats:
    print('Format: {}'.format(format_))
    for j, dataset in enumerate(datasets):
        if dataset == 'amblyo':
            dataset_dir = amblyo_dir
            list_subjects = amblyo_subjects
        elif dataset == 'control':
            dataset_dir = control_dir
            list_subjects = control_subjects
        
        for i, subject in enumerate(list_subjects):
            
            # Load data thresholded
            tsv_dir = '{}/{}/derivatives/pp_data/{}/{}/prf/tsv'.format(
                data_dir, dataset_dir, subject, format_)
            tsv_prf_ecc_cat_fn = "{}/{}_prf_ecc_cat.tsv".format(tsv_dir, subject)
            df_prf_ecc_cat_indiv = pd.read_table(tsv_prf_ecc_cat_fn, sep="\t")

            if i == 0 and j==0 :df_prf_ecc_cat = df_prf_ecc_cat_indiv.copy()
            else: df_prf_ecc_cat = pd.concat([df_prf_ecc_cat, df_prf_ecc_cat_indiv])

    # Mean/CI analysis
    
    # Subject type: control vs. patient
    columns_res = ['prf_size_weighted_avg', 'pcm_weighted_avg', 'prf_loo_r2_weighted_avg', 'vert_area_sum']
    df_subject_type_mean = df_prf_ecc_cat.groupby(['roi', 'ecc_category', 'subject_type'])[columns_res].mean()
    df_subject_type_mean.columns = ['prf_size_weighted_avg_mean', 'pcm_weighted_avg_mean', 'prf_loo_r2_weighted_avg_mean', 'vert_area_sum_mean']
    
    df_subject_type_ci = pd.DataFrame()
    for colname in df_prf_ecc_cat.columns:
        if '_avg' in colname or '_sum' in colname:
            df_subject_type_ci['{}_ci_down'.format(colname)] = df_prf_ecc_cat.groupby(['roi', 'ecc_category', 'subject_type'])[colname].apply(
                lambda x: np.percentile(x, 2.5))
            df_subject_type_ci['{}_ci_up'.format(colname)] = df_prf_ecc_cat.groupby(['roi', 'ecc_category', 'subject_type'])[colname].apply(
                lambda x: np.percentile(x, 97.5))
    df_subject_type = pd.concat([df_subject_type_mean, df_subject_type_ci], axis=1).reset_index()
    
    # Amblyo type: control vs. aniso vs. strab vs. mixed
    df_amblyo_type_mean = df_prf_ecc_cat.groupby(['roi', 'ecc_category', 'amblyo_type', ])[columns_res].mean()
    df_amblyo_type_mean.columns = ['prf_size_weighted_avg_mean', 'pcm_weighted_avg_mean', 'prf_loo_r2_weighted_avg_mean', 'vert_area_sum_mean']
    df_amblyo_type_ci = pd.DataFrame()
    for colname in df_prf_ecc_cat.columns:
        if '_avg' in colname or '_sum' in colname:
            df_amblyo_type_ci['{}_ci_down'.format(colname)] = df_prf_ecc_cat.groupby(['roi', 'ecc_category', 'amblyo_type'])[colname].apply(
                lambda x: np.percentile(x, 2.5))
            df_amblyo_type_ci['{}_ci_up'.format(colname)] = df_prf_ecc_cat.groupby(['roi', 'ecc_category', 'amblyo_type'])[colname].apply(
                lambda x: np.percentile(x, 97.5))
    df_amblyo_type = pd.concat([df_amblyo_type_mean, df_amblyo_type_ci], axis=1).reset_index()

    # Saving per format
    tsv_dir_intergroup = '{}/{}/derivatives/pp_data/inter-group/{}/tsv'.format(data_dir, amblyo_dir, format_)
    os.makedirs(tsv_dir_intergroup, exist_ok=True)

    tsv_subject_type_fn = "{}/inter-group_prf_ecc_cat_subject_type.tsv".format(tsv_dir_intergroup)
    print('Saving tsv: {}'.format(tsv_subject_type_fn))
    df_subject_type.to_csv(tsv_subject_type_fn, sep="\t", na_rep='NaN', index=False)

    tsv_amblyo_type_fn = "{}/inter-group_prf_ecc_cat_amblyo_type.tsv".format(tsv_dir_intergroup)
    print('Saving tsv: {}'.format(tsv_amblyo_type_fn))
    df_amblyo_type.to_csv(tsv_amblyo_type_fn, sep="\t", na_rep='NaN', index=False)

    # Stats
    # Subject type: control vs. patient
    first_line = True
    for roi in rois:
        for ecc_category in ecc_categories:
            for dv in df_prf_ecc_cat.columns:
                if '_avg' in dv or '_sum' in dv:
                    # Get data
                    data = df_prf_ecc_cat.loc[(df_prf_ecc_cat.roi == roi) & 
                                              (df_prf_ecc_cat.ecc_category == ecc_category)]
                    
                    # Define dataframe values
                    df_fdr_line = pd.DataFrame()
                    df_fdr_line['roi'] = [roi]
                    df_fdr_line['ecc_category'] = ecc_category
                    df_fdr_line['dv'] = dv
    
                    for alternative in alternatives:                        
                        df_fdr_line['alternative'] = alternative
                        # make benjamini-hochberg false discovery rate pairwise (fdr_bh) test
                        fdr = pg.pairwise_tests(data=df_prf_ecc_cat, dv=dv, between='subject_type', alternative=alternative,
                                                parametric=False, correction=True, effsize='cohen', padjust='fdr_bh')
    
                        for num_comp, (a, b) in enumerate(zip(fdr.A, fdr.B)):
                            df_fdr_line['diff_type'] = ["[{} vs. {}]".format(a, b)]
                            df_fdr_line['cohen'] = fdr.cohen[num_comp].item()
                            df_fdr_line['pvalue-cor'] = fdr['p-unc'][num_comp].item()
                            
                            if first_line==True: 
                                df_fdr_subect_type = df_fdr_line
                                first_line = False
                            else: df_fdr_subect_type = pd.concat([df_fdr_subect_type, df_fdr_line])
                            
    df_fdr_subect_type['roi'] = pd.Categorical(df_fdr_subect_type['roi'], categories=rois, ordered=True)
    df_fdr_subect_type = df_fdr_subect_type.sort_values('roi')
    
    # Saving
    tsv_fdr_subject_type_fn = "{}/inter-group_prf_ecc_cat_subject_type_fdr_stats.tsv".format(tsv_dir_intergroup)
    print('Saving tsv: {}'.format(tsv_fdr_subject_type_fn))
    df_fdr_subect_type.to_csv(tsv_fdr_subject_type_fn, sep="\t", na_rep='NaN', index=False)
    
    # Amblyo type: control vs. aniso vs. strab vs. mixed
    first_line = True
    for roi in rois: 
        for ecc_category in ecc_categories:
            for dv in df_prf_ecc_cat.columns:
                if '_avg' in dv or '_sum' in dv:
                    # Get data
                    data = df_prf_ecc_cat.loc[(df_prf_ecc_cat.roi == roi) & 
                                              (df_prf_ecc_cat.ecc_category == ecc_category)]
                    
                    # Define dataframe values
                    df_fdr_line = pd.DataFrame()
                    df_fdr_line['roi'] = [roi]
                    df_fdr_line['ecc_category'] = ecc_category
                    df_fdr_line['dv'] = dv

                    for alternative in alternatives:                        
                        df_fdr_line['alternative'] = alternative
                        # make benjamini-hochberg false discovery rate pairwise (fdr_bh) test
                        fdr = pg.pairwise_tests(data=df_prf_ecc_cat, dv=dv, between='amblyo_type', alternative=alternative,
                                                parametric=False, correction=True, effsize='cohen', padjust='fdr_bh')

                        for num_comp, (a, b) in enumerate(zip(fdr.A, fdr.B)):
                            df_fdr_line['diff_type'] = ["[{} vs. {}]".format(a, b)]
                            df_fdr_line['cohen'] = fdr.cohen[num_comp].item()
                            df_fdr_line['pvalue-cor'] = fdr['p-unc'][num_comp].item()
                            
                            if first_line==True: 
                                df_fdr_amblyo_type = df_fdr_line
                                first_line = False
                            else: df_fdr_amblyo_type = pd.concat([df_fdr_amblyo_type, df_fdr_line])
                            
    df_fdr_amblyo_type['roi'] = pd.Categorical(df_fdr_amblyo_type['roi'], categories=rois, ordered=True)
    df_fdr_amblyo_type = df_fdr_amblyo_type.sort_values('roi')

    # Saving
    tsv_fdr_amblyo_type_fn = "{}/inter-group_prf_ecc_cat_amblyo_type_fdr_stats.tsv".format(tsv_dir_intergroup)
    print('Saving tsv: {}'.format(tsv_fdr_amblyo_type_fn))
    df_fdr_amblyo_type.to_csv(tsv_fdr_amblyo_type_fn, sep="\t", na_rep='NaN', index=False)



Format: fsnative
Saving tsv: /home/mszinte/disks/meso_S/data/amblyo_prf/derivatives/pp_data/inter-group/fsnative/tsv/inter-group_prf_ecc_cat_subject_type.tsv
Saving tsv: /home/mszinte/disks/meso_S/data/amblyo_prf/derivatives/pp_data/inter-group/fsnative/tsv/inter-group_prf_ecc_cat_amblyo_type.tsv
Saving tsv: /home/mszinte/disks/meso_S/data/amblyo_prf/derivatives/pp_data/inter-group/fsnative/tsv/inter-group_prf_ecc_cat_subject_type_fdr_stats.tsv
Saving tsv: /home/mszinte/disks/meso_S/data/amblyo_prf/derivatives/pp_data/inter-group/fsnative/tsv/inter-group_prf_ecc_cat_amblyo_type_fdr_stats.tsv
Format: 170k
Saving tsv: /home/mszinte/disks/meso_S/data/amblyo_prf/derivatives/pp_data/inter-group/170k/tsv/inter-group_prf_ecc_cat_subject_type.tsv
Saving tsv: /home/mszinte/disks/meso_S/data/amblyo_prf/derivatives/pp_data/inter-group/170k/tsv/inter-group_prf_ecc_cat_amblyo_type.tsv
Saving tsv: /home/mszinte/disks/meso_S/data/amblyo_prf/derivatives/pp_data/inter-group/170k/tsv/inter-group_prf_ecc