# Anatomical Abnormalities Associated with  Schizophrenia—An Observational Study 

In [None]:
# standard library imports
import pathlib
import functools

# 3rd party library imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pingouin as pg
import scipy.stats
import seaborn as sns

# two digits of precision is good enough here
pd.options.display.float_format = "{:.3f}".format

# enable TeX on figure labels
plt.rcParams['text.usetex'] = True

sns.set()

We begin by reading the data and summarizing the variables.

In [None]:
df = pd.read_csv('case0202.csv')
df.describe()

In [None]:
# wrangle the data into long form
dflong = (
    df.stack()
      .reset_index(level=1)
      .rename({'level_1': 'Treatment', 0: 'Volume'}, axis='columns')
)
dflong.head()

In [None]:
ax = sns.boxplot(data=dflong, x='Treatment', y='Volume')
_ = ax.set_ylabel(r'Volume ($cm^3$)')
_ = ax.set_title('Hippocampus Volumes for Schizophrenia Study')

In [None]:
g = sns.displot(data=dflong, x='Volume', hue='Treatment', kind='kde', rug=True)
_ = g.fig.suptitle('Distributions of Hippocampus Study Volumes')
g.fig.tight_layout()

$H_0: \mu_{affected} = \mu_{unaffected}$
<br>
$H_a: \mu_{affected} \ne \mu_{unaffected}$

In [None]:
res = pg.ttest(df['Affected'], df['Unaffected'], paired=True)
print(res)

There is strong evidence that the mean difference in left hippocampus volumes bettween schizophrenic individuals and their nonschizophrenic twins is non-zero (two-sided $p$-value = 0.006, $t_{0.95, 14}$ = -3.229).   The mean difference is between 0.07 and 0.33 $cm^3$ smaller for schizophrenic individuals.