In [None]:
# Check if any two groups differ significantly in terms of age, IQ and in-scanner movement 

In [None]:
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import statsmodels.api as sm
import pylab

In [None]:
def load_excel_data(excel_file, spreadsheet, group1, group2, covariate):
    data = pd.read_excel(excel_file, sheet_name=spreadsheet, engine='openpyxl')
    sub_ids = data.iloc[:, 0]
    data.index = sub_ids
    group1_df = data.loc[[ses for ses in group1], covariate]
    group2_df = data.loc[[ses for ses in group2], covariate]
    return group1_df, group2_df

In [None]:
# Check normality of data
def check_norm(data):
    # Visualise data in a histogram
    plt.hist(data
             ,bins = 5
            )
    plt.show()
    
    # Create a quantile-quantile (Q-Q) plot
    sm.qqplot(data, line='q')
    pylab.show()
    
    # Test normality
    t, p = stats.shapiro(data)
    alpha = 0.05 #1e-3

    print('t-statistic = ' + '{0:.10f}'.format(t))
    print('P-value = ' + '{0:.10f}'.format(p))

    # null hypothesis: x comes from a normal distribution
    if p < alpha:
        print("The null hypothesis can be rejected. The sample is NOT normally distributed.")
        return False
    else:
        print("The null hypothesis cannot be rejected. The sample is normally distributed.")
        return True

In [None]:
# Check for equality of variances
def calc_var_equal(d1, norm_d1, d2, norm_d2):
    print("")
    print("Testing equality of variances..")
    alpha = 0.05
    # If normally distributed, perform a Bartlett test
    if norm_d1 == True and norm_d2 == True:
        stat, p = stats.bartlett(d1, d2)
    else:
    # If non-normally distributed, perform a Levene test
        stat, p = stats.levene(d1, d2)

    # null hypothesis: x and y have equal variances
    if p < alpha:
        print("The null hypothesis can be rejected. X and Y do NOT have equal variances.")
        return False
    else:
        print("The null hypothesis cannot be rejected. X and Y have equal variances.")
        return True

In [None]:
# Do a whisker plot
def two_group_whisker_plot(d1, d2):
    # https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.boxplot.html

    fig, ax = plt.subplots(figsize=(20,8))
    whisker_figure = ax.boxplot([d1, d2], 
                              #notch=True, 
                              #bootstrap=5000, 
                              showfliers=True)
    #ax.set_xticklabels('PUT GROUP NAMES HERE')
    plt.show(whisker_figure)

In [None]:
# Only for continuous variables!!!
def test_sign_diff(d1, norm_d1, d2, norm_d2, var_equal, paired):
    # If normally distributed
    if norm_d1 == True and norm_d2 == True:
        # And with equal variances
        if var_equal == True:
            if paired == True:
                print(stats.ttest_rel(a=d1, b=d2))
            elif paired == False:
                # Do an independent two-sample Student t-test
                print(stats.ttest_ind(a=d1, b=d2, equal_var=True))
            else:
                print("Please enter True or False for the paired argument..")
        # If variances are unequal, perform Welch's test
        else:
            print(stats.ttest_ind(a=d1, b=d2, equal_var=False))
    # If non-normally distributed, conduct a Wilcoxon rank sum (aka Mann-Whitney) test
    else:
        if paired == True:
            print(stats.wilcoxon(d1, d2))
        elif paired == False:
            #https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.mannwhitneyu.html
            u, prob = stats.mannwhitneyu(d1, d2)
            print("u = {:g}".format(u))
            print("prob = {:g}".format(prob))
            # to get two-sided p-value:
            two_sided_prob = 2 * prob
            print("P-value = " + str(two_sided_prob))
        else:
            print("Please enter True or False for the paired argument..")

In [None]:
# Check assumptions for the statistical tests and perform the appropriate test
def check_norm_diff(work_dir, excel_file, spreadsheet, group1, group2, covariate, paired):
    # Go into the working directory
    os.chdir(work_dir)
    
    # Load the data and generate two separate data frames, one for each group
    group1_df, group2_df = load_excel_data(excel_file, spreadsheet, group1, group2, covariate)
    
    # Check normality of each group
    print('Normality check for group 1:')
    group1_norm = check_norm(group1_df)
    print('')
    print('Normality check for group 2:')
    group2_norm = check_norm(group2_df)
    
    # Check equality of variances between the two groups
    var_equal = calc_var_equal(group1_df, group1_norm, group2_df, group1_norm)
    print('')
    print('Equality of variances is = ' + str(var_equal))
    
    # Plot group values in a whisker plot
    two_group_whisker_plot(group1_df, group2_df)
    
    # "Pause" the program and ask the user if they agree with the normality checks
    print('')
    while True:
        agree = input('Do you agree with the normality checks? Y/N ')
        if agree == 'Y':
            break
        elif agree == 'N':
            while True:
                print('')
                true_norm = input('Is the distribution of group 1 and 2 normal, respectively? Y/N, Y/N ')
                if true_norm == 'Y, N':
                    group1_norm = True
                    group2_norm = False
                    break
                elif true_norm == 'N, Y':
                    group1_norm = False
                    group2_norm = True
                    break
                elif true_norm == 'N, N':
                    group1_norm = False
                    group2_norm = False
                    break
                else:
                    print('Please type Y/N, Y/N. ')
                        
            break
        
        else:
            print('Please type Y or N. ')
            
    
    # Test whether the difference between the groups is statistically significant
    print('')
    print('Are the differences significant? See below..')
    print('The sample consists of ' + str(len(group1_df)) + ' neurotypical and ' + str(len(group2_df)) + ' autistic individuals.')
    print('Mean and SD of covariate:')
    print('Mean (for NT and ASC) = ' + str(np.mean(group1_df)) + ' and ' + str(np.mean(group2_df)))
    print('SD (for NT and ASC) = ' + str(np.std(group1_df)) + ' and ' + str(np.std(group2_df)))
    test_sign_diff(group1_df, group1_norm, group2_df, group2_norm, var_equal, paired)
    
    print('Done!')
    

In [None]:
# --------------------------------------------------------------------------------------------------------------- #

In [None]:
# ------------------------------------------- A R B A C L O F E N ----------------------------------------------- #

In [None]:
# # Age and IQ
# Create ID lists
study_name = 'M'

complete_td_list = ['001', '002', '004', '005', '006', '007', '008', '009', '010', '011', '013', '014', '015', 
                    '016', '017', '018', '019', '020', '021', '022', '030']
complete_autism_list = ['101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '112', '113', '114', 
                        '115', '117', '118', '119', '120', '121', '122', '130']

td_list = [study_name + sub_id for sub_id in complete_td_list]
autism_list = [study_name + sub_id for sub_id in complete_autism_list]

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/R-baclofen/Data/Covariates',
               'Covariates_imputed_with_group_mean_males_only.xlsx',
               'Age_IQ',
               td_list,
               autism_list,
               'Age',
               paired=False)

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/R-baclofen/Data/Covariates',
               'Covariates_imputed_with_group_mean_males_only.xlsx',
               'Age_IQ',
               td_list,
               autism_list,
               'IQ',
               paired=False)

In [None]:
# # mFD

# Create ID lists
study_name = 'M'

placebo_td = ['001A', '002C', '004B', '005B', '006C', '007A', '008A', '009C', '010A', '011B', '013B', '014A', 
              '015C', '016C', '020B', '021A', '022A']
placebo_autism = ['101A', '102C', '105B', '107A', '108A', '109C', '110A', '112B', '113B', '114A', '115C', '117C', 
                  '118B', '119C', '120B', '121A', '122A', '130C']

placebo_td_mFD = [study_name + sub_id for sub_id in placebo_td]
placebo_autism_mFD = [study_name + sub_id for sub_id in placebo_autism]

arbaclofen_td = ['001B', '002B', '004A', '005C', '007C', '008B', '009B', '010C', '011C', '013A', '014B', '015B',
                '017A', '018A', '019A', '020C', '021C', '022C', '030A']
arbaclofen_autism = ['101B', '103A', '104A', '105C', '106A', '107C', '108B', '109B', '110C', '112A', '113A', '114B',
                    '115B', '118A', '119A', '120C', '121C']

arbaclofen_td_mFD = [study_name + sub_id for sub_id in arbaclofen_td]
arbaclofen_autism_mFD = [study_name + sub_id for sub_id in arbaclofen_autism]

td_paired_mFD = [i[:-1] for i in placebo_td if i[:-1] in [j[:-1] for j in arbaclofen_td]]
placebo_td_mFD_paired = [study_name + i for i in placebo_td if i[:-1] in td_paired_mFD]
arbaclofen_td_mFD_paired = [study_name + i for i in arbaclofen_td if i[:-1] in td_paired_mFD]

autism_paired_mFD = [i[:-1] for i in placebo_autism if i[:-1] in [j[:-1] for j in arbaclofen_autism]]
placebo_autism_mFD_paired = [study_name + i for i in placebo_autism if i[:-1] in autism_paired_mFD]
arbaclofen_autism_mFD_paired = [study_name + i for i in arbaclofen_autism if i[:-1] in autism_paired_mFD]

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/R-baclofen/Data/Covariates',
               'Covariates_imputed_with_group_mean_males_only.xlsx',
               'mFD',
               placebo_td_mFD,
               placebo_autism_mFD,
               'mFD',
               paired=False)

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/R-baclofen/Data/Covariates',
               'Covariates_imputed_with_group_mean_males_only.xlsx',
               'mFD',
               arbaclofen_td_mFD,
               arbaclofen_autism_mFD,
               'mFD',
               paired=False)

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/R-baclofen/Data/Covariates',
               'Covariates_imputed_with_group_mean_males_only.xlsx',
               'mFD',
               placebo_td_mFD_paired,
               arbaclofen_td_mFD_paired,
               'mFD',
               paired=True)

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/R-baclofen/Data/Covariates',
               'Covariates_imputed_with_group_mean_males_only.xlsx',
               'mFD',
               placebo_autism_mFD_paired,
               arbaclofen_autism_mFD_paired,
               'mFD',
               paired=True)

In [None]:
# --------------------------------------------------------------------------------------------------------------- #

In [None]:
# ------------------------------------------- C I T A L O P R A M ----------------------------------------------- #

In [None]:
# # Age and IQ
# Create ID lists
study_name = 'BRCTRADA'

complete_td_list = ['001', '003', '004', '007', '008', '009', '010', '011', '012', '013', '014', '015', 
                    '016', '017', '019', '020', '021', '022', '023']
complete_autism_list = ['101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113',
                        '114', '116', '117', '119', '120', '123']

td_list = [study_name + sub_id for sub_id in complete_td_list]
autism_list = [study_name + sub_id for sub_id in complete_autism_list]

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/Citalopram/Data/Covariates',
               'Covariates.xlsx',
               'Age_IQ_basemFD',
               td_list,
               autism_list,
               'Age',
               paired=False)

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/Citalopram/Data/Covariates',
               'Covariates.xlsx',
               'Age_IQ_basemFD',
               td_list,
               autism_list,
               'IQ',
               paired=False)

In [None]:
# without 108! (only a citalopram session)
autism_list.remove('BRCTRADA108')
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/Citalopram/Data/Covariates',
               'Covariates.xlsx',
               'Age_IQ_basemFD',
               td_list,
               autism_list,
               'mFD',
               paired=False)

In [None]:
# # mFD

# Create ID lists
study_name = 'BRCTRADA'

placebo_td = ['001D', '003D', '004D', '007D', '008C', '009B', '010C', '011B', '012C', '013C', '014B', '015D', '016B',
          '017D', '019B', '020B', '021C', '022C', '023D']
placebo_autism = ['101D', '102C', '103C', '104B', '105B', '106B', '107C', '109C', '110C', '111D', '112B', '113D', 
                  '114D', '116B', '117B', '119C', '120C', '123B']

placebo_td_mFD = [study_name + sub_id for sub_id in placebo_td]
placebo_autism_mFD = [study_name + sub_id for sub_id in placebo_autism]

citalopram_td = ['001C', '004B', '007B', '008B', '009D', '010D', '011C', '012D', '014D', '015C', '016C', '017C', 
                 '020D', '021D', '022B', '023C']
citalopram_autism = ['101B', '102D', '103B', '104C', '105C', '106D', '107B', '108C', '109D', '110D', '112D', 
                     '114C', '116D', '117C', '119B', '120D', '123D']

citalopram_td_mFD = [study_name + sub_id for sub_id in citalopram_td]
citalopram_autism_mFD = [study_name + sub_id for sub_id in citalopram_autism]

td_paired_mFD = [i[:-1] for i in placebo_td if i[:-1] in [j[:-1] for j in citalopram_td]]
placebo_td_mFD_paired = [study_name + i for i in placebo_td if i[:-1] in td_paired_mFD]
citalopram_td_mFD_paired = [study_name + i for i in citalopram_td if i[:-1] in td_paired_mFD]

autism_paired_mFD = [i[:-1] for i in placebo_autism if i[:-1] in [j[:-1] for j in citalopram_autism]]
placebo_autism_mFD_paired = [study_name + i for i in placebo_autism if i[:-1] in autism_paired_mFD]
citalopram_autism_mFD_paired = [study_name + i for i in citalopram_autism if i[:-1] in autism_paired_mFD]

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/Citalopram/Data/Covariates',
               'mFD_complete.xlsx',
               'mFD_complete',
               citalopram_td_mFD,
               citalopram_autism_mFD,
               'mFD',
               paired=False)

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/Citalopram/Data/Covariates',
               'mFD_complete.xlsx',
               'mFD_complete',
               placebo_td_mFD_paired,
               citalopram_td_mFD_paired,
               'mFD',
               paired=True)

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/Citalopram/Data/Covariates',
               'mFD_complete.xlsx',
               'mFD_complete',
               placebo_autism_mFD_paired,
               citalopram_autism_mFD_paired,
               'mFD',
               paired=True)

In [None]:
# --------------------------------------------------------------------------------------------------------------- #

In [None]:
# ------------------------------------------- T I A N E P T I N E ----------------------------------------------- #

In [None]:
# # Age and IQ

# Create ID lists
study_name = 'BRCTRADA'

complete_td_list = ['001', '003', '004', '007', '008', '009', '010', '011', '012', '013', '014', '015', '016', '017', '019', '020', '021', '022', '023']
complete_autism_list = ['101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '116', '117', '119', '120', '123']

td_list = [study_name + sub_id for sub_id in complete_td_list]
autism_list = [study_name + sub_id for sub_id in complete_autism_list]

placebo_td = ['001D', '003D', '004D', '007D', '008C', '009B', '010C', '011B', '012C', '013C', '014B', '015D', '016B', '017D', '019B', '020B', '021C', '022C', '023D']
placebo_autism = ['101D', '102C', '103C', '104B', '105B', '106B', '107C', '109C', '110C', '112B', '113D', '114D', '116B', '117B', '119C', '120C', '123B']

placebo_td_list_mFD = [study_name + sub_id for sub_id in placebo_td]
placebo_autism_list_mFD = [study_name + sub_id for sub_id in placebo_autism]

placebo_td_list = [study_name + sub_id[:3] for sub_id in placebo_td]
placebo_autism_list = [study_name + sub_id[:3] for sub_id in placebo_autism]

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/Tianeptine/Data/Covariates',
               'Covariates.xlsx',
               'data',
               td_list,
               autism_list,
               'Age',
               paired=False)

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/Tianeptine/Data/Covariates',
               'Covariates.xlsx',
               'data',
               td_list,
               autism_list,
               'IQ', 
                paired=False)

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/Tianeptine/Data/Covariates',
               'mFD_complete.xlsx',
               'mFD_complete',
               placebo_td_list_mFD,
               placebo_autism_list_mFD,
               'mFD',
               paired=False)

In [None]:
# # mFD

# Create ID lists
study_name = 'BRCTRADA'
tianeptine_td = ['001B', '003C', '004C', '007C', '008D', '009C', '010B', '011D', '012B', '013D', '014C', '015B',
              '016D', '017B', '019C', '020C', '021B', '022D', '023B']
tianeptine_autism = ['101C', '102B', '103D', '104D', '105D', '106C', '107D',
          '109B', '110B', '112C', '113C', '114B', '116C', '117D', '119D', '120B', '123C']

tianeptine_td_list_mFD = [study_name + sub_id for sub_id in tianeptine_td]
tianeptine_autism_list_mFD = [study_name + sub_id for sub_id in tianeptine_autism]

tianeptine_td_list = [study_name + sub_id[:3] for sub_id in tianeptine_td]
tianeptine_autism_list = [study_name + sub_id[:3] for sub_id in tianeptine_autism]

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/Tianeptine/Data/Covariates',
               'mFD_complete.xlsx',
               'mFD_complete',
               tianeptine_td_list_mFD,
               tianeptine_autism_list_mFD,
               'mFD',
               paired=False)

In [None]:
# Also compare within groups

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/Tianeptine/Data/Covariates',
               'mFD_complete.xlsx',
               'mFD_complete',
               placebo_td_list_mFD,
               tianeptine_td_list_mFD,
               'mFD',
               paired=True)

In [None]:
autism_paired_mFD = [i[:-1] for i in placebo_autism if i[:-1] in [j[:-1] for j in tianeptine_autism]]
placebo_autism_mFD_paired = [study_name + i for i in placebo_autism if i[:-1] in autism_paired_mFD]
tianeptine_autism_mFD_paired = [study_name + i for i in tianeptine_autism if i[:-1] in autism_paired_mFD]

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/Tianeptine/Data/Covariates',
               'mFD_complete.xlsx',
               'mFD_complete',
               placebo_autism_mFD_paired,
               tianeptine_autism_mFD_paired,
               'mFD',
               paired=True)

In [None]:
# --------------------------------------------------------------------------------------------------------------- #

In [None]:
# ----------------------------------------- C A N N A B I D I O L ----------------------------------------------- #

In [None]:
# # Age and IQ

# Create ID lists
study_name = 'CAN'

complete_td_list = ['001', '002', '003', '004', '005', '006', '007', '008', '009', '010', '012', '014', '015', 
                    '016', '017', '018', '020']
complete_autism_list = ['101', '102', '103', '104', '105', '106', '109', '110', '111', '112', '114', '115', '117', 
                        '118', '120']

td_list = [study_name + sub_id for sub_id in complete_td_list]
autism_list = [study_name + sub_id for sub_id in complete_autism_list]

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/CBD/Data/Covariates',
               'Covariates.xlsx',
               'Age_IQ',
               td_list,
               autism_list,
               'Age',
               paired=False)

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/CBD/Data/Covariates',
               'Covariates.xlsx',
               'Age_IQ',
               td_list,
               autism_list,
               'IQ',
               paired=False)

In [None]:
# # mFD

# Create ID lists

placebo_td = ['001C', '002B', '003C', '004A', '005B', '006A', '007A', '008B', '009C', '010A', '012B', '014C', 
              '015B', '016B', '017B', '018B', '020A']
placebo_autism = ['101B', '102A', '103A', '105C', '106C', '109A', '110C', '111B', '112A', '114C', '115B', '117B', 
                        '118A', '120A']

placebo_td_mFD = [sub_id for sub_id in placebo_td]
placebo_autism_mFD = [sub_id for sub_id in placebo_autism]

cbd_td = ['001A', '002A', '003B', '004B', '005C', '006C', '007B', '008C', '009B', '010C', '012A', '014B', '015C', 
                    '016A', '017A', '018C', '020B']
cbd_autism = ['101C', '103B', '104A', '105B', '106A', '109C', '111C', '112C', '114A', '115C', '117C', 
                        '118B', '120B']

cbd_td_mFD = [sub_id for sub_id in cbd_td]
cbd_autism_mFD = [sub_id for sub_id in cbd_autism]

td_paired_mFD = [i[:-1] for i in placebo_td if i[:-1] in [j[:-1] for j in cbd_td]]
placebo_td_mFD_paired = [i for i in placebo_td if i[:-1] in td_paired_mFD]
cbd_td_mFD_paired = [i for i in cbd_td if i[:-1] in td_paired_mFD]

autism_paired_mFD = [i[:-1] for i in placebo_autism if i[:-1] in [j[:-1] for j in cbd_autism]]
placebo_autism_mFD_paired = [i for i in placebo_autism if i[:-1] in autism_paired_mFD]
cbd_autism_mFD_paired = [i for i in cbd_autism if i[:-1] in autism_paired_mFD]

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/CBD/Data/Covariates',
               'Covariates.xlsx',
               'Complete',
               placebo_td_mFD,
               placebo_autism_mFD,
               'mFD',
               paired=False)

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/CBD/Data/Covariates',
               'Covariates.xlsx',
               'Complete',
               cbd_td_mFD,
               cbd_autism_mFD,
               'mFD',
               paired=False)

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/CBD/Data/Covariates',
               'Covariates.xlsx',
               'Complete',
               placebo_td_mFD_paired,
               cbd_td_mFD_paired,
               'mFD',
               paired=True)

In [None]:
check_norm_diff('/Users/mishodimitrov/Downloads/PhD/Analysis/CBD/Data/Covariates',
               'Covariates.xlsx',
               'Complete',
               placebo_autism_mFD_paired,
               cbd_autism_mFD_paired,
               'mFD',
               paired=True)