# What is the True Normal Human Body Temperature? 

#### Background

The mean normal body temperature was held to be 37$^{\circ}$C or 98.6$^{\circ}$F for more than 120 years since it was first conceptualized and reported by Carl Wunderlich in a famous 1868 book. But, is this value statistically correct?

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

df = pd.read_csv('data/human_body_temperature.csv')

In [None]:
# First, a histogram
%matplotlib inline
plt.hist(df['temperature'])
plt.xlabel('Temperature')
plt.ylabel('Frequency')
plt.title('Histogram of Body Temperature')
plt.ylim(0, 40)  # Add some buffer space at the top so the bar doesn't get cut off.

In [None]:
# Next, a quantile plot.
import statsmodels.api as sm
mean = np.mean(df['temperature'])
sd = np.std(df['temperature'])
z = (df['temperature'] - mean) / sd
sm.qqplot(z, line='45')

In [None]:
# Finally, a normal distribution test. Not recommended!! Use only when you're not sure.
import scipy.stats as stats
stats.mstats.normaltest(df['temperature'])

In [None]:
n = len(df['temperature'])
n

In [None]:
# Calculates p value using 100,000 boostrap replicates
bootstrap_replicates = np.empty(100000)

size = len(bootstrap_replicates)

for i in range(size):
    bootstrap_sample = np.random.choice(temperature, size=len(temperature))
    bootstrap_replicates[i] = np.mean(bootstrap_sample)

p = np.sum(bootstrap_replicates >= 98.6) / len(bootstrap_replicates)
print('p =', p)

In [None]:
z = (mean - 98.6)/(sd / np.sqrt(n))
z

In [None]:
stats.norm.cdf(z) * 2
# NOTE: Since CDF gives us $P(Z \le z)$ and this is a two-tailed test, we multiply the result by 2

In [None]:
t = (mean - 98.6)/(sd / np.sqrt(n))

In [None]:
t_critical = stats.t.ppf(0.05 / 2, n - 1)
t_critical

In [None]:
sd = df['temperature'].std()
n = len(df['temperature'])
moe = 1.96 * sd / np.sqrt(n)
moe

In [None]:
mean = df['temperature'].mean()
ci = mean + np.array([-1, 1]) * moe
ci

In [None]:
# Define bootstrap functions:

def replicate(data, function):
    """Return replicate of a resampled data array."""
    
    # Create the resampled array and return the statistic of interest:
    return function(np.random.choice(data, size=len(data)))


def draw_replicates(data, function, size=1):
    """Draw bootstrap replicates."""

    # Initialize array of replicates:
    replicates = np.empty(size)

    # Generate replicates:
    for i in range(size):
        replicates[i] = replicate(data, function)

    return replicates

In [None]:
# Seed the random number generator:
np.random.seed(15)

# Draw bootstrap replicates of temperatures:
replicates = draw_replicates(df.temperature, np.mean, 10000)

# Compute the 99.9% confidence interval:
CI = np.percentile(replicates, [0.05, 99.95])
print('99.9% Confidence Interval:', CI)

In [None]:
males = df.gender == 'M'
diff_means = df.temperature[males].mean() - df.temperature[~males].mean()
sd_male = df.temperature[males].std()
sd_female = df.temperature[~males].std()
n_male = np.sum(males)
n_female = len(df.temperature) - n_male

z = diff_means / np.sqrt(((sd_male ** 2)/ n_male) + ((sd_female ** 2)/ n_female))
z

In [None]:
pval = stats.norm.cdf(z) * 2
pval

In [None]:
diff_means + np.array([-1, 1]) * 1.96 * np.sqrt(((sd_male ** 2)/ n_male) + ((sd_female ** 2)/ n_female))

In [None]:
permutation_replicates = np.empty(100000)

size = len(permutation_replicates)

for i in range(size):
    combined_perm_temperatures = np.random.permutation(np.concatenate((male_temperature, female_temperature)))

    male_permutation = combined_perm_temperatures[:len(male_temperature)]
    female_permutation = combined_perm_temperatures[len(male_temperature):]

    permutation_replicates[i] = np.abs(np.mean(male_permutation) - np.mean(female_permutation))
    
p_val = np.sum(permutation_replicates >= male_and_female_diff) / len(permutation_replicates)

print('p =', p_val)