In [None]:
# Check if any two groups differ significantly in terms of age, IQ and in-scanner movement 

In [None]:
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

In [None]:
def load_excel_data(excel_file, spreadsheet, group1, group2, covariate):
    data = pd.read_excel(excel_file, sheet_name=spreadsheet, engine='openpyxl')
    sub_ids = data.iloc[:, 0]
    data.index = sub_ids
    group1_df = data.loc[[ses for ses in group1], covariate]
    group2_df = data.loc[[ses for ses in group2], covariate]
    return group1_df, group2_df

In [None]:
# Do a whisker plot
def two_group_whisker_plot(d1, d2):
    # https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.boxplot.html

    fig, ax = plt.subplots(figsize=(20,8))
    whisker_figure = ax.boxplot([d1, d2], 
                              #notch=True, 
                              #bootstrap=5000, 
                              showfliers=True)
    #ax.set_xticklabels('PUT GROUP NAMES HERE')
    plt.show(whisker_figure)

In [None]:
# Check normality of data
def check_norm(data):
    # Visualise first
    plt.hist(data
             ,bins = 5
            )
    plt.show()
    
    k2, p = stats.normaltest(data)
    alpha = 1e-3

    print('P-value = ' + '{0:.10f}'.format(p))

    # null hypothesis: x comes from a normal distribution
    if p < alpha:
        print("The null hypothesis can be rejected. The sample is NOT normally distributed.")
        return False
    else:
        print("The null hypothesis cannot be rejected. The sample is normally distributed.")
        return True

In [None]:
# Check for equality of variances
def calc_var_equal(d1, d2):
    v1, v2 = np.var(d1), np.var(d2)
    if (v1 / v2) or (v2 / v1) >= 4:
        return False
    else:
        return True

In [None]:
# Only for continuous variables!!!
def test_sign_diff(d1, norm_d1, d2, norm_d2, var_equal):
    if norm_d1 == True and norm_d2 == True:
        if var_equal == True:
            print(stats.ttest_ind(a=d1, b=d2, equal_var=True))
        # https://www.statology.org/determine-equal-or-unequal-variance/
        else:
            print(stats.ttest_ind(a=d1, b=d2, equal_var=False))
    else:
        #https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.mannwhitneyu.html
        u, prob = stats.mannwhitneyu(d1, d2)
        print("u = {:g}".format(u))
        print("prob = {:g}".format(prob))
        # to get two-sided p-value:
        two_sided_prob = 2 * prob
        print("P-value = " + str(two_sided_prob))

In [None]:
# Check assumptions for the statistical tests and perform the appropriate test
def check_norm_diff(work_dir, excel_file, spreadsheet, group1, group2, covariate):
    # Go into the working directory
    os.chdir(work_dir)
    
    # Load the data and generate two separate data frames, one for each group
    group1_df, group2_df = load_excel_data(excel_file, spreadsheet, group1, group2, covariate)
    
    # Check normality of each group
    print('Normality check for group 1:')
    group1_norm = check_norm(group1_df)
    print('')
    print('Normality check for group 2:')
    group2_norm = check_norm(group2_df)
    
    # Plot group values in a whisker plot
    two_group_whisker_plot(group1_df, group2_df)
    
    # Check equality of variances between the two groups
    var_equal = calc_var_equal(group1_df, group2_df)
    print('')
    print('Equality of variances is = ' + str(var_equal))
    
    # "Pause" the program and ask the user if they agree with the normality checks
    print('')
    while True:
        agree = input('Do you agree with the normality checks? Y/N ')
        if agree == 'Y':
            break
        elif agree == 'N':
            while True:
                print('')
                true_norm = input('Is the distribution of group 1 and 2 normal, respectively? Y/N, Y/N ')
                if true_norm == 'Y, N':
                    group1_norm = True
                    group2_norm = False
                    break
                elif true_norm == 'N, Y':
                    group1_norm = False
                    group2_norm = True
                    break
                elif true_norm == 'N, N':
                    group1_norm = False
                    group2_norm = False
                    break
                else:
                    print('Please type Y/N, Y/N. ')
                        
            break
        
        else:
            print('Please type Y or N. ')
            
    
    # Test whether the difference between the groups is statistically significant
    print('')
    print('Are the differences significant? See below..')
    test_sign_diff(group1_df, group1_norm, group2_df, group2_norm, var_equal)
    
    print('Done!')
    

In [None]:
# --------------------------------------------------------------------------------------------------------------- #

In [None]:
# # Placebo sessions from the Green+Orange dataset

# Create ID lists
study_name = 'BRCTRADA'
placebo_td = ['001D', '003D', '004D', '007D', '008C', '009B', '010C', '011B', '012C', '013C', '014B', '015D', '016B', '017D', '019B', '020B', '021C', '022C', '023D']
placebo_autism = ['101D', '102C', '103C', '104B', '105B', '106B', '107C', '109C', '110C', '111D', '112B', '113D', '114D', '116B', '117B', '119C', '120C', '123B']

placebo_td_list = [study_name + sub_id[:3] for sub_id in placebo_td]
placebo_autism_list = [study_name + sub_id[:3] for sub_id in placebo_autism]

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/Tianeptine/Data/Covariates',
               'Covariates.xlsx',
               'data',
               placebo_td_list,
               placebo_autism_list,
               'IQ')

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/Tianeptine/Data/Covariates',
               'Covariates.xlsx',
               'data',
               placebo_td_list,
               placebo_autism_list,
               'Age')

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/Tianeptine/Data/Covariates',
               'Covariates.xlsx',
               'data',
               placebo_td_list,
               placebo_autism_list,
               'mFD')

In [None]:
# # Tianeptine sessions from the Green+Orange dataset

# Create ID lists
study_name = 'BRCTRADA'
tianeptine_td = ['001B', '003C', '004C', '007C', '008D', '009C', '010B', '011D', '012B', '013D', '014C', '015B',
              '016D', '017B', '019C', '020C', '021B', '022D', '023B']
tianeptine_autism = ['101C', '102B', '103D', '104D', '105D', '106C', '107D',
          '108B', '109B', '110B', '112C', '113C', '114B', '115D', '116C', '117D', '119D', '120B', '123C']

tianeptine_td_list = [study_name + sub_id[:3] for sub_id in tianeptine_td]
tianeptine_autism_list = [study_name + sub_id[:3] for sub_id in tianeptine_autism]

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/Tianeptine/Data/Covariates',
               'Covariates.xlsx',
               'data',
               tianeptine_td_list,
               tianeptine_autism_list,
               'IQ')

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/Tianeptine/Data/Covariates',
               'Covariates.xlsx',
               'data',
               tianeptine_td_list,
               tianeptine_autism_list,
               'Age')

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/Tianeptine/Data/Covariates',
               'Covariates.xlsx',
               'data',
               tianeptine_td_list,
               tianeptine_autism_list,
               'mFD')

In [None]:
# Also compare ASC_p vs ASC_T

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/Tianeptine/Data/Covariates',
               'Covariates.xlsx',
               'data',
               placebo_autism_list,
               tianeptine_autism_list,
               'IQ')

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/Tianeptine/Data/Covariates',
               'Covariates.xlsx',
               'data',
               tianeptine_td_list,
               tianeptine_autism_list,
               'Age')

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/Tianeptine/Data/Covariates',
               'Covariates.xlsx',
               'data',
               tianeptine_td_list,
               tianeptine_autism_list,
               'mFD')