Module config

In [None]:
from pathlib import Path

import pandas as pd
import numpy as np

import scipy.stats as sp

import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from pylab import savefig

Seaborn config

In [None]:
%matplotlib inline
sns.set(style='white')

Filesystem config

In [None]:
derivs = Path().absolute().parents[1] / 'derivatives'
source_dir = derivs / '0.4.joined'
alt_source_dir = derivs / '20190218'
figures_dir = derivs / 'figures'

Import data

In [None]:
data = pd.read_csv( source_dir / 'all_subject_level.csv' )
data = pd.read_csv( alt_source_dir / 'all_subject_level_bound.csv')

# Set standard variables to be analyzed

In [None]:
varset = {
    'dvars': [
        'comp_t1_bound',
        'comp_t2_bound',
        'comp_change_bound',
    ],
    'ivars': [
        'SciLit_sum_bound', 
        'vocab_sum_bound',
        'NFCS_sum_bound', 
        'TSSI_sum_bound', 
        'procspd_RT_bound',
        'nb_RT_bound',
    ],
}

ivars = list(varset['ivars'])
dvars = list(varset['dvars'])

# Demographics

In [None]:
(data.groupby(['AgeGroup', 'Condition'], as_index=False)
    .agg({'sub':'count', 'Age':'mean', 'Gender':'mean'})
    .rename({'sub':'Count', 'Age':'Mean Age', 'Gender':'% Female'}, axis=1))

In [None]:
(data.groupby(['AgeGroup', 'Condition'], as_index=False)
    .agg({'sub':'count', 'comp_change':'mean', 'comp_t1':'mean', 'comp_t2':'mean'})
    .rename({'sub':'Count', 'comp_t1':'T1', 'comp_t2':'T2', 'comp_change':'Mean Diff'}, axis=1))

# Bound correlograms

I'm forced to drop null values before passing to `sns.pairplot()`, so we end up with fewer values:

In [None]:
pairplots_data = data[['AgeGroup', 'Condition'] + ivars + dvars]
print(
    pairplots_data.shape[0],'->',
    pairplots_data.dropna().shape[0]
)

In [None]:
for v in varset.keys():
    for g in ('AgeGroup', 'Condition'):
        figpath = figures_dir / ('pairplot_' + v + '_by_' + g.lower() + '.png')
        pplot = sns.pairplot(data = data[ [g] + varset[v] ].dropna(), hue = g)
        pplot.savefig(figpath, bbox_inches = 'tight')


## Linear Model Plots

In [None]:
jitters = {
    'SciLit_sum_bound': 0.2,
}

In [None]:
# Loop through predefined ivars and dvars
# to create individual bivariate relationship plots
for iv in ivars:
    for dv in dvars:
        
        # Select only the data we will use for each plot
        # Grouping var, IV, DV
        df = data[ [ 'AgeGroup' , iv , dv ] ]
        
        # Loop through model subsets and select only the data in the subset we want to analyze for each model
        for group in ('All', 'OA', 'YA'):
            if group is not 'All':
                # Select only OAs or YAs depending on the model
                jp_df = df.loc[df['AgeGroup'] == group]
            # Otherwise use the whole `df` as selected earlier
            else: jp_df = df
                
            # Use jitters dict to set the x_jitter on any IVs that are relatively discrete
            # We'll use the x_jitter parameter of `sns.regplot` to add some visual noise for those IVs
            # This doesn't affect the values being analyzed - only affects aesthetics of the plot
            if iv in jitters.keys():
                xj = jitters[iv]
            else: xj=None
            
            # Using a JointGrid object for reasons
            # Plotting the `regplot` & `distplot` separately so we can add visual noise
            # `JointGrid.annotate` method embeds the test results from `scipy.stats.pearsonr`
            g = sns.JointGrid(x = iv, y = dv, data = jp_df)
            g = g.plot_joint(sns.regplot, x_jitter=xj, y_jitter = 0.3)
            g = g.plot_marginals(sns.distplot)
            g = g.annotate(sp.pearsonr)
            
            # Add a `_suptitle` to the JointGrid
            g.fig.suptitle(t=(group + ' subjects'), y = 1.015)
            
            # Set the filename according to the plot's parameters and save
            figpath = figures_dir / ('joint_' + iv + '_' + dv + '_' + group + '.png')
            g.savefig(figpath)

# Horizontal Swarm, Strip, Box, Bar plots

In [None]:
plot_types = { 'swarm':sns.swarmplot, 'box':sns.boxplot, 'strip': sns.stripplot }
for p in plot_types.keys():
    for grpY in [['AgeGroup','Condition'], ['AgeGroup'], ['Condition']]:
        for varX in (ivars + dvars):
            df = (data.dropna(subset=grpY)
                      .set_index(grpY))

            fig = plot_types[p](data=df, x=varX, y=df.index)


            fname = varX + '_'+ ''.join(grpY) +'_'+ p +'.png'
            fpath = figures_dir / fname
            print(fpath)

            fig.get_figure().savefig(fpath, bbox_inches='tight')
            plt.clf()

print('Done!')