In [2]:
import numpy as np
import scipy.stats
import matplotlib.pyplot as plt
import math
import pandas as pd
import seaborn as sns

In [4]:
# -----  LOAD UP JOSH'S EXPORTED DATA FILE -----

controlData_file = 'controlData_20240123.csv'
controlData = pd.read_csv(controlData_file, header=0)
controlData.head(4)
data = controlData.drop('Unnamed: 0',axis=1)

Unnamed: 0.1,Unnamed: 0,ID,Sex,Geno,oft_datetime,oft_nanList,oft_cm_px,oft_Outer,oft_Center,oft_Distance_px,oft_Distance_cm,tct_stimSide,tct_nanList,tct_StimSide,tct_Center,tct_ObjSide,tct_StimIntxn,tct_NoIntxn,tct_ObjIntxn
0,0,E0443,M,Het,"9/28/23, 3:03 PM",,0.101888,0.820503,0.179497,89640.52634,9133.280659,Left,"Int64Index([8988, 8989, 8990, 8991, 8992, 8993...",0.540163,0.113218,0.34662,0.307245,0.550486,0.14227
1,1,E0441,M,Het,"9/28/23, 3:23 PM",,0.10197,0.917039,0.082961,45625.48161,4652.428704,Right,"Int64Index([], dtype='int64')",0.276681,0.723319,0.0,0.167242,0.832758,0.0
2,2,Biggie,M,WT,"9/28/23, 3:40 PM",,0.101779,0.954746,0.045254,50958.26852,5186.494291,Right,"Int64Index([10621, 10622, 10623, 10624, 10625,...",0.493616,0.207026,0.299357,0.245836,0.606196,0.147968
3,3,LilGuy,M,Het,"9/28/23, 3:54 PM",,0.101927,0.930694,0.069306,111641.3958,11379.25729,Left,"Int64Index([], dtype='int64')",0.483117,0.034962,0.481921,0.322458,0.538657,0.138885


In [None]:
# ----- FUNCTION TO TEST FOR NORMALITY -----

def testNormal(input, alpha=0.05):

    if len(input) >= 20:
        #This function is based on D’Agostino and Pearson’s test that combines skew and kurtosis to produce an omnibus test of normality
        results = scipy.stats.normaltest(input) 
        p_values = results.pvalue
        normal_method = "D'Agostino-Pearson test (combined kurtosis + skewness)"
        if p_value > alpha: isnormal = True
        elif p_value <= alpha: isnormal = False
    else: 
        #For n<20, we have to rely on a Monte Carlo approach to test skew and kurtosis (kurtosistest is only valid n>=20) 
        rvs = lambda size: scipy.stats.norm.rvs(size=size, random_state=np.random.default_rng())
        skew_results = scipy.stats.monte_carlo_test(input, rvs, scipy.stats.skew, vectorized=True)
        kurtosis_results = scipy.stats.monte_carlo_test(input, rvs, scipy.stats.kurtosis, vectorized=True)
        p_values = [skew_results.pvalue, kurtosis_results.pvalue]
        normal_method = "Monte Carlo method (Fisher kurtosis + Fisher-Pearson skewness)"
        # data is only treated as normal if it has both normal skew and kurtosis
        if p_values[0] > 0.05 and p_values[1] > alpha: isnormal = True
        else: isnormal = False

    return isnormal, p_values, normal_method