In the following code we test for violation of assumptions before statistical analysis.

In [5]:
# importing packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import json
import nibabel as nib
from numpy.linalg import inv # inverse function for matrix multiplication
from scipy.interpolate import interp1d # for resampling
from nilearn.glm.first_level.hemodynamic_models import glover_hrf
from scipy.stats import pearsonr
import scipy

# setting work and data directories --------------------------------------------
csv_directory = os.path.dirname("/home/c13572687/Documents/scripts_and_data/csv_data/") # this on the server
fmri_directory = os.path.dirname("/home/c13572687/Documents/scripts_and_data/fmri_data/") # this on the server

# loading data
exploratory_data = pd.read_csv(str(csv_directory + "/exploratory_half.csv"), sep = ',')
confirmatory_data = pd.read_csv(str(csv_directory + "/confirmatory_half.csv"), sep = ',')

# sanity check
exploratory_data.shape[0] == confirmatory_data.shape[0] == 427

# loading in segmentation data (will not be using but it's there)
segmentation_data = pd.read_csv(str(csv_directory + "/segmentation_seconds.csv"), sep = ',')
segmentation_data = segmentation_data.iloc[0:24,:] # removing weird random exta cells

Our data is not supposed to be normal, as it was systematically sampled to represent the Dutch population. In this cell we prove this (and save the outcome of the statistical tests to `.csv` files which can then be used in the final report):

In [17]:
# assumption checks (exploratory; behavioural) ----------------------------------------------

import math
from scipy.stats import shapiro 

# need to code edu level into numbers
exploratory_data['education_level_numbers'] = np.where(exploratory_data['education_level'] == 'high', 3, np.where(exploratory_data['education_level'] == 'medium', 2, np.where(exploratory_data['education_level'] == 'low', 1, 0)))
exploratory_data['sex_numbers'] = np.where(exploratory_data['sex'] == 'male', 0, 1) 

#perform Shapiro-Wilk test for normality and put values in table
df = pd.DataFrame({
    "Variable": ['Issue-based Social Ideology', 'Identity-based Social Ideology', 'Education level', 'Background SES', 'Sex'],
    "Shapiro-Wilk Statistic": [np.round(shapiro(exploratory_data['social.ideology'])[0],3), np.round(shapiro(exploratory_data['social.identity'])[0],3), np.round(shapiro(exploratory_data['education_level_numbers'])[0],3), np.round(shapiro(exploratory_data['background_SES'])[0],3), np.round(shapiro(exploratory_data['sex_numbers'])[0],3)],
    "p-value": [shapiro(exploratory_data['social.ideology'])[1], 
    shapiro(exploratory_data['social.identity'])[1], 
    shapiro(exploratory_data['education_level_numbers'])[1],
    shapiro(exploratory_data['background_SES'])[1],
    shapiro(exploratory_data['sex_numbers'])[1]]
    
})
df.to_csv(os.path.join(csv_directory + r"/exp_normality.csv"))

# assumption checks (confirmatory; behavioural) ----------------------------------------------
# ==== https://www.datanovia.com/en/lessons/repeated-measures-anova-in-r/ =====

# need to code edu level into numbers
confirmatory_data['education_level_numbers'] = np.where(confirmatory_data['education_level'] == 'high', 3, np.where(confirmatory_data['education_level'] == 'medium', 2, np.where(confirmatory_data['education_level'] == 'low', 1, 0)))
confirmatory_data['sex_numbers'] = np.where(confirmatory_data['sex'] == 'male', 0, 1) 

#perform Shapiro-Wilk test for normality and put values in table
df = pd.DataFrame({
    "Variable": ['Issue-based Social Ideology', 'Identity-based Social Ideology', 'Education level', 'Background SES', 'Gender'],
    "Shapiro-Wilk Statistic": [np.round(shapiro(confirmatory_data['social.ideology'])[0],4), np.round(shapiro(confirmatory_data['social.identity'])[0],4), np.round(shapiro(confirmatory_data['education_level_numbers'])[0],4), np.round(shapiro(confirmatory_data['background_SES'])[0],4), np.round(shapiro(confirmatory_data['sex_numbers'])[0],3)],
    "p-value": [shapiro(confirmatory_data['social.ideology'])[1], 
    shapiro(confirmatory_data['social.identity'])[1], 
    shapiro(confirmatory_data['education_level_numbers'])[1],
    shapiro(confirmatory_data['background_SES'])[1],
    shapiro(confirmatory_data['sex_numbers'])[1]]
    
})
df.to_csv(os.path.join(csv_directory + r"/conf_normality.csv"))
