# Do Female Fish have Pre-Existing Preferences for a Male Trait?

In [None]:
# standard library imports
import warnings

warnings.simplefilter('ignore', category=FutureWarning)

# 3rd party library imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats
import seaborn as sns

sns.set()

In [None]:
df = pd.read_csv('case0602.csv')

# Robustness of Assumptions

In [None]:
df.groupby('Pair')['Proportion'].describe()

In [None]:
fig, axes = plt.subplots(ncols=2, figsize=[12, 6.4])
_ = sns.boxplot(data=df, x='Pair', y='Proportion', ax=axes[0])
_ = sns.kdeplot(data=df, x='Proportion', hue='Pair', ax=axes[1])

There is strong evidence against the assumption of equal variance.

Are the mice population lifetimes statistically different?  I.e.,

$\it{H_0}:  \mu_{N/N85} = \mu_{N/R40} = \mu_{N/R50} = \mu_{NP} = \mu_{R/R50} = \mu_{lopro}$

In [None]:
mu = summary.loc['mean', 'Lifetime']

a = np.empty((3, 5))
a[:] = NaN
index = ['Between Groups', 'Within Groups', 'Total']
columns = ['Sum of Squares', 'd.f.', 'Mean Square', 'F Statistic', 'p-value']
dfss = pd.DataFrame(a, columns=columns, index=index)

dfss.at['Total', 'Sum of Squares'] = ((df.Lifetime - mu) ** 2).sum()

ssg = [((g.Lifetime - g.Lifetime.mean()) ** 2).sum() for idx, g in df.groupby('Diet')]
dfss.at['Within Groups', 'Sum of Squares'] = sum(ssg)

ssb = [
    ((g['Lifetime'].mean() - mu) ** 2) * len(g) 
    for _, g in df.groupby('Diet')
] 
dfss.at['Between Groups', 'Sum of Squares'] = sum(ssb)

n = len(df)
I = len(df.Diet.unique())
dfss['d.f.'] = [I - 1, n - I, n - 1]

dfss['Mean Square'] = dfss['Sum of Squares'] / dfss['d.f.']

dfss.at['Between Groups', 'F Statistic'] = dfss.at['Between Groups', 'Mean Square'] / dfss.at['Within Groups', 'Mean Square']

dfss.at['Between Groups', 'p-value'] = scipy.stats.f.sf(dfss.at['Between Groups', 'F Statistic'],
                                                        dfss.at['Between Groups', 'd.f.'],
                                                        dfss.at['Within Groups', 'd.f.'])

dfss

In [None]:
args = [grp['Lifetime'] for _, grp in df.groupby('Diet')]
scipy.stats.f_oneway(*args)

This is overwhelming evidence that the populations are different.

Does reducing from 85 kcal to 50 kcal per week reduce lifespan?

In [None]:
scipy.stats.ttest_ind(df.loc[df.Diet == 'N/N85', 'Lifetime'], df.loc[df.Diet == 'N/R50', 'Lifetime'])
