# The Normal Distribution

In [1]:
import pandas as pd
import numpy as np

In [2]:
from scipy.stats import norm

In [3]:
import matplotlib.pyplot as plt

# Probability Density Function (PDF)

$$ N(\mu,\sigma) = 
\frac{1}{\sqrt{2\pi\sigma^2}} \times e ^ {-\frac{(x-\mu)^2}{2\sigma^2}}
$$

# Calculate probability between -inf and +inf

$$
\int^{\infty}_{-\infty}N(\mu,\sigma)
$$

In [4]:
x = [19, 22, 20, 19, 23]

In [5]:
mean = np.mean(x)

In [6]:
sd = np.std(x)

Calculate the probability for the fuse to run for 18 seconds or less

Instead of -infinity we will use 10 seconds as in the text

$$
\int^{18}_{10}N(\mu,\sigma)
$$

In [7]:
# we use 10 in place of -infinity
norm.cdf(18, loc=mean, scale=sd) - norm.cdf(10, loc=mean, scale=sd)

0.05477828835707988

Now lets use -infinity to see the difference

$$
\int^{18}_{-\infty}N(\mu,\sigma)
$$

In [8]:
norm.cdf(18, loc=mean, scale=sd) - norm.cdf(-np.inf, loc=mean, scale=sd)

0.054778288391342723

# Area Under the Curve for different distances from the Mean

In [9]:
mu = 0 # mean
sigma = 1 # standard deviation

In [10]:
data_l = list()
for x in range(1,11):
    p = norm.cdf((mu+(x*sigma)), loc=mu, scale=sigma) - norm.cdf((mu-(x*sigma)), loc=mu, scale=sigma)
    data_l.append({'distance_from_mean': '{} sigma'.format(x), 'probability': p})
    
data_df = pd.DataFrame.from_records(data_l)

In [11]:
data_df

Unnamed: 0,distance_from_mean,probability
0,1 sigma,0.682689
1,2 sigma,0.9545
2,3 sigma,0.9973
3,4 sigma,0.999937
4,5 sigma,0.999999
5,6 sigma,1.0
6,7 sigma,1.0
7,8 sigma,1.0
8,9 sigma,1.0
9,10 sigma,1.0


# Exercise 1

What is the probability of observing a value five sigma greater than the mean or more?

$$
\int^{\infty}_{5}N(\mu,\sigma)
$$

In [12]:
x = 5
p = norm.cdf(np.inf, loc=mu, scale=sigma) - norm.cdf(5, loc=mu, scale=sigma)
print(p)

2.866515719235352e-07


# Exercise 2

A fever is any temperature greater than 100.4 degrees Farenheit. Given the following measurements, what is the probability that the patient has a fever?

100.0, 99.8, 101.0, 100.5, 99.7

$$
\int^{\infty}_{100.4}N(\mu,\sigma)
$$

In [13]:
recordings = [100.0, 99.8, 101.0, 100.5, 99.7]

In [14]:
mu = np.mean(recordings)
sigma = np.std(recordings)

x = 100.4

p = norm.cdf(np.inf, loc=mu, scale=sigma) - norm.cdf(x, loc=mu, scale=sigma)

print('probablity of fever is {:.2f} or {:.0%}'.format(p, p))

probablity of fever is 0.34 or 34%



# Exercise 3

Measuring depth of a well by coin drops results in the following values:

2.5, 3.0, 3.5, 4, 2

The distance an object falls can be calculated, in meters, with the following formula:

$$
distance = 1/2 \times G \times time ^ 2
$$

Where G is 9.8 m/s. What is the probability that the well is over 500 meters deep?

$$
\int^{\infty}_{500}N(\mu,\sigma)
$$

First map recorded times to distances

In [15]:
times = [2.5, 3.0, 3.5, 4, 2]

In [16]:
def calc_distance_from_time(time):
    G = 9.8
    return 0.5 * G * (time**2)

In [17]:
distances = [calc_distance_from_time(t) for t in times]

In [18]:
distances

[30.625000000000004, 44.1, 60.025000000000006, 78.4, 19.6]

Calculate mean and standard deviation for distances

In [19]:
mu = np.mean(distances)
sigma = np.std(distances)


Calculate area under the curve

In [20]:
x = 500
p = norm.cdf(np.inf, loc=mu, scale=sigma) - norm.cdf(x, loc=mu, scale=sigma)

In [21]:
print('probablity of the well being over 500 meters deep is {:.5f} or {:.5%}'.format(p, p))

probablity of the well being over 500 meters deep is 0.00000 or 0.00000%
