## Parametric Tests: Tests Designed for Normally-Distributed Data
- Dependent (Paired) T-Tests
- Independent (Unpaired) T-Tests

### Import Required Libraries

In [1]:
import os
import numpy as np
import pandas as pd
from math import sqrt
from scipy import stats
from scipy.stats import t
from scipy.stats import ttest_ind, ttest_rel

import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
%matplotlib inline

### Load the Data

In [2]:
# Locate the data file
data_dir = os.path.join(os.getcwd(), 'Data')
source_file = os.path.join(data_dir, 'Mice-Pheno.csv')

# Read the source from the Web into a Pandas DataFrame.
df = pd.read_csv(source_file, header=0)

# Inspect the file contents
df = df.dropna()
df.head()

Unnamed: 0,Sex,Diet,Bodyweight
0,F,hf,31.94
1,F,hf,32.48
2,F,hf,22.82
3,F,hf,19.92
4,F,hf,32.22


#### Separate the Two Different Treatments

In [3]:
high_fat = df[df.Diet == 'hf']
chow = df[df.Diet == 'chow']

#### Separate the Two Groups (Samples)

In [4]:
male = df[df.Sex == 'M']
female = df[df.Sex == 'F']

#### Define a Function to Calculate the Mean Difference and Confidence Interval

In [5]:
def diff_mean_and_conf_intvl(sample_1, sample_2):
    N1 = len(sample_1)
    N2 = len(sample_2)
    
    # Degrees of Freedom
    DoF = (N1 + N2 - 2) 

    std1 = sample_1.std()
    std2 = sample_2.std()
    std_N1N2 = sqrt( ((N1 - 1)*(std1)**2 + (N2 - 1)*(std2)**2) / DoF)

    # Mean Difference
    diff_mean = sample_1.mean() - sample_2.mean()
    
    # Margin of Error:
    MoE = t.ppf(0.975, DoF) * std_N1N2 * sqrt(1/N1 + 1/N2)
    
    return diff_mean, MoE

#### Independent (Unpaired) T-Test: Two Samples Exposed to the Same Treatment

In [6]:
hf_male = male[male.Diet == 'hf']
hf_female = female[female.Diet == 'hf']

t_stat, p_val = ttest_ind(hf_male.Bodyweight, hf_female.Bodyweight)

print('The results of the independent t-test are: \n\tt-value = {:4.3f}\n\tp-value = {}'.format(t_stat, p_val))

The results of the independent t-test are: 
	t-value = 15.930
	p-value = 2.1854663590369379e-44


In [7]:
# Calculate the Mean Difference and the 95% Confidence Interval
diff_mean, MoE = diff_mean_and_conf_intvl(hf_male.Bodyweight, hf_female.Bodyweight)

print('The difference between groups is %3.1f' % diff_mean)
print('The 95% Confidence Interval is [{:3.1f} to {:3.1f}]'.format(diff_mean - MoE, diff_mean + MoE))

The difference between groups is 8.6
The 95% Confidence Interval is [7.5 to 9.6]


#### Dependent (Paired) T-Test: Two Samples Exposed to Two Different Treatments

In [8]:
t_stat, p_val = ttest_rel(male[0:416].Bodyweight, female[0:416].Bodyweight)

print('The results of the dependent t-test are: \n - t-value = {:4.3f}\n - p-value = {}'.format(t_stat, p_val))

The results of the dependent t-test are: 
 - t-value = 23.355
 - p-value = 1.2509045732304141e-77


In [9]:
# Calculate the Mean Difference and the 95% Confidence Interval
diff_mean, MoE = diff_mean_and_conf_intvl(male[0:416].Bodyweight, female[0:416].Bodyweight)

print('The difference between groups is %3.1f' % diff_mean)
print('The 95% Confidence Interval is [{:3.1f} to {:3.1f}]'.format(diff_mean - MoE, diff_mean + MoE))

The difference between groups is 7.8
The 95% Confidence Interval is [7.1 to 8.4]
