In [11]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from scipy.stats import norm, normaltest

In [12]:
MIN_DURATIONS = [21]
DATA_DIR = '../data/welm_'
BASE_DIR = '../results/'
DICT = {'log_V_V0_cen': 'log_V/V0', 'V/V0_cen': 'V/V0', 'V/C0_cen': 'V/C0', 'lg_V/C0_cen': 'ln(V/C0)', 'log_V/V0_cen': 'log(V/V0)', 'log_V/V0_samp_cen': 'log(V/V0_samp)'}

In [13]:
def normality_test(x, alpha):
    k2, p = normaltest(x)
    s = 'p = ' + str(round(p, 6)) + ' --> '
    if p < alpha:  # null hypothesis: x comes from a normal distribution
        s += 'p < ' + str(alpha) + '\n'
        s += 'The null hypothesis (that data come from normal dist) CAN be rejected.'
    else:
        s += 'p >= ' + str(alpha) + '\n'
        s += 'The null hypothesis (that data come from normal dist) CANNOT be rejected.'
    return s, p

In [14]:
def create_plot(val, group, fn):
    if group == 'combined':
        x = list(df[val])
    elif group == 'treatment':
        x = list(df.loc[df.Control == 0][val])
    elif group == 'control':
        x = list(df.loc[df.Control == 1][val])
    else:
        print('Error! Value group must be one of: combined, treatment, control.')
    fig = plt.figure()
    x_axis = np.arange(np.min(x), np.max(x), 0.001)
    mean = np.mean(x)
    std = np.std(x)
    sns.histplot(x, stat='density')
    plt.plot(x_axis, norm.pdf(x_axis, loc=mean, scale=std), color='r')
    plt.title(DICT[val] + ', ' + group + ', days >= ' + str(min_duration) + ', N = ' + str(len(x)))
    s_normality, p = normality_test(x, .05)
    s = 'mean: ' + str(round(mean, 3)) + ', std: ' + str(round(std, 3)) + '\n'
    s += s_normality
    print(val + ', ' + group + ', p: ' + str(p))
    fig.text(.5, -.05, s, ha='center')
    plt.savefig(fn, bbox_inches='tight')
    plt.clf()
    plt.close()

In [15]:
for min_duration in MIN_DURATIONS:
    data_fn = DATA_DIR + 'min_duration_' + str(min_duration) + '.csv'
    save_dir = BASE_DIR + '/'
    df = pd.read_csv(data_fn)
    print('duration >= ' + str(min_duration))
    for group in ['combined', 'treatment', 'control']:
        for val in ['log_V_V0_cen']:
            save_fn = save_dir + val.replace('/', '_') + '-' + group + '.png'
            create_plot(val, group, save_fn)

duration >= 21
log_V_V0_cen, combined, p: 7.136597414061487e-17
log_V_V0_cen, treatment, p: 4.897194537603441e-08
log_V_V0_cen, control, p: 0.3283496728958437
