# Chapter 7 Continuous Probability Distributions
## Eric Lambert

In [1]:
# Uniform distribution methods
def mean_uniform_dist(min, max):
    return (min + max) / 2

def variance_uniform_dist(min, max):
    return ((max - min)**2) / 12

def std_dev_uniform_dist(min, max):
    import math
    return math.sqrt(variance_uniform_dist(min, max))

def prob_x_uniform_dist(x, min, max):
    return (1 / (max - min)) * ((x - x) / 1)

def prob_lt_x_uniform_dist(x, min, max):
    if x < min:
        return 0
    elif x > max:
        return 1
    else: 
        return (x - min) / (max - min)

def prob_gt_x_uniform_dist(x, min, max):
    if x < min:
        return 1
    elif x > max:
        return 0
    else: 
        return 1 / (max - min) * ((max - x) / 1)

In [2]:
# Normal distribution methods
def std_norm_value(value, mean, std_dev):
    return (value - mean) / std_dev

def z_value(value, mean, std_dev):
    return std_norm_value(value, mean, std_dev)

def probability_norm_dist(value, mean, std_dev):
    import math
    a = 1 / (std_dev * math.sqrt((2 * math.pi)))
    b1 = (value - mean)**2
    b2 = 2 * std_dev**2
    b = math.e**(b1 / b2)
    return a * b

def area_under_normal_dist_in_range(mean, std_dev, lower_bound, upper_bound):
    import math
    double_prob = math.erf((lower_bound - mean) / (std_dev * math.sqrt(2)))
    prob_lower = double_prob / 2

    double_prob = math.erf((upper_bound - mean) / (std_dev * math.sqrt(2)))
    prob_upper = double_prob / 2
    return prob_upper - prob_lower

def area_under_normal_dist_out_range(mean, std_dev, lower_bound, upper_bound):
    return 1 - area_under_normal_dist_in_range(mean, std_dev, lower_bound, upper_bound)


## Homework Exercises

## Ex3. The closing price of Schnur Sporting Goods Inc. common stock is uniformly distributed between \$20 and \$30 per share. What is the probability that the stock price will be:

a. More than $27?

b. Less than or equal to $24?


In [3]:
min = 20
max = 30

# statistics of a uniform distribution
mean_uniform_dist = (min + max) / 2
variance_uniform_dist = ((max - min)**2) / 12
std_dev_uniform_dist = variance_uniform_dist**(1/2)
probability = 1 / (max - min) # if min <= x <= max and 0 elsewhere

print('part a')
x = 27
probability = (max - x) / (max - min) # if min <= x <= max and 0 elsewhere
print(f'The probability that the stock price will be more than $27 is {probability:.4f}.')

print('\npart b')
x = 24
probability = (x - min) / (max - min) # if min <= x <= max and 0 elsewhere
print(f'The probability that the stock price will be less than or equal to $24 is {probability:.4f}.')



part a
The probability that the stock price will be more than $27 is 0.3000.

part b
The probability that the stock price will be less than or equal to $24 is 0.4000.


## Ex5. The April rainfall in Flagstaff, Arizona, follows a uniform distribution between 0.5 and 3.00 inches.

a. What are the values for a and b?

b. What is the mean amount of rainfall for the month? What is the standard deviation?

c. What is the probability of less than an inch of rain for the month?

d. What is the probability of exactly 1.00 inch of rain?

e. What is the probability of more than 1.50 inches of rain for the month?

In [4]:
min = 0.5
max = 3.00

print('part a')
print(f'The values for a and b are {min:.2f} and {max:.2f}.')

print('\npart b')
mean = (min + max) / 2
variance = ((max - min)**2) / 12
std_dev = variance**(1/2)
print(f'The mean amount of rainfall for the month is {mean:.4f}.')
print(f'The standard deviation is {std_dev:.4f}.')

print('\npart c')
x = 1.00
probability = prob_lt_x_uniform_dist(x, min, max)
print(f'The probability of less than an inch of rain for the month is {probability:.4f}.')

print('\npart d')
x = 1.00
probability = prob_x_uniform_dist(x, min, max)
print(f'The probability of exactly 1.00 inch of rain is {probability:.4f}.')

print('\npart e')
x = 1.50
probability = prob_gt_x_uniform_dist(x, min, max)
print(f'The probability of more than 1.50 inches of rain for the month is {probability:.4f}.')

part a
The values for a and b are 0.50 and 3.00.

part b
The mean amount of rainfall for the month is 1.7500.
The standard deviation is 0.7217.

part c
The probability of less than an inch of rain for the month is 0.2000.

part d
The probability of exactly 1.00 inch of rain is 0.0000.

part e
The probability of more than 1.50 inches of rain for the month is 0.6000.


## Ex9. The mean of a normal probability distribution is 500; the standard deviation is 10.

a. About 68% of the observations lie between what two values?

b. About 95% of the observations lie between what two values?

c. Practically all of the observations lie between what two values?

In [5]:
# by Empirical Rule:
#   68% of observations are within one std_dev of the mean
#   95% of observations are within two std_devs of the mean
#   Practically all observations are within three std_devs of the mean

mean = 500
std_dev = 10

print('part a')
std_dev_count = 1
upper = mean + (std_dev_count * std_dev)
lower = mean - (std_dev_count * std_dev)
print(f'About 68% of the observations lie between {lower} and {upper}.')

print('\npart b')
std_dev_count = 2
upper = mean + (std_dev_count * std_dev)
lower = mean - (std_dev_count * std_dev)
print(f'About 95% of the observations lie between {lower} and {upper}.')

print('\npart c')
std_dev_count = 3
upper = mean + (std_dev_count * std_dev)
lower = mean - (std_dev_count * std_dev)
print(f'Practically all of the observations lie between {lower} and {upper}.')

part a
About 68% of the observations lie between 490 and 510.

part b
About 95% of the observations lie between 480 and 520.

part c
Practically all of the observations lie between 470 and 530.


## Ex15. The mean hourly pay of an American Airlines flight attendant is normally distributed with a mean of \$29.81 per hour and a standard deviation of \$9.31 per hour. What is the probability that the hourly pay of a randomly selected flight attendant:

a. Is between the mean and $35.00 per hour?

b. Is more than $35.00 per hour?

c. Is less than $20.00 per hour?

In [6]:
mean = 29.81
std_dev = 9.31

print('part a')
value = 35.00
z = z_value(value, mean, std_dev)
print(f'z-value: {z:.4f}')
table_val = 0.2131
probability = table_val
print(f'The probability that the hourly pay of a randomly selected flight attendant is between the mean and $35.00 per hour is {probability:.4f}')

print('\npart b')
value = 35.00
z = z_value(value, mean, std_dev)
print(f'z-value: {z:.4f}')
table_val = 0.2131
probability = 0.5000 - table_val
print(f'The probability that the hourly pay of a randomly selected flight attendant is more than $35.00 per hour is {probability:.4f}')

print('\npart c')
value = 20.00
z = z_value(value, mean, std_dev)
print(f'z-value: {z:.4f}')
table_val = .3531
probability = 0.5000 - table_val
print(f'The probability that the hourly pay of a randomly selected flight attendant is less than $20.00 per hour is {probability:.4f}')

part a
z-value: 0.5575
The probability that the hourly pay of a randomly selected flight attendant is between the mean and $35.00 per hour is 0.2131

part b
z-value: 0.5575
The probability that the hourly pay of a randomly selected flight attendant is more than $35.00 per hour is 0.2869

part c
z-value: -1.0537
The probability that the hourly pay of a randomly selected flight attendant is less than $20.00 per hour is 0.1469


## Ex19. The Internal Revenue Service repored the average refund in 2017 was \$2,878 with a standard deviation of \$520. Assume the amount refunded is normally distributed.

a. What percentage of the refunds are more than $3,500?

b. What percentage of the refunds are more than \$3,500 but less than \$4,000?

c. What percentage of the refunds are more than \$2,400 but less than \$4,000?

In [7]:
mean = 2878
std_dev = 520

print('part a')
value = 3500

z = z_value(value, mean, std_dev)
print(f'z-value_{value}: {z:.4f}')

table_val = 0.3849

# the table_val gives us the area from mean to value. we want the area from the value and above.
# the area above the mean is always .5. Therefore we subtract the area between the mean and the 
# value (table_val) to get the area above the value. 
probability = .5 - table_val
print(f'The percentage of the refunds more than $3,500 is {probability * 100:.2f}%')

print('\npart b')
value_1 = 3500
value_2 = 4000

z1 = z_value(value_1, mean, std_dev)
print(f'z-value_{value_1}: {z1:.4f}')

z2 = z_value(value_2, mean, std_dev)
print(f'z-value_{value_2}: {z2:.4f}')

table_val_1 = 0.3849
table_val_2 = 0.4846

# value_1 and value_2 are on the same side of the mean so subtracting the area of the smaller from that of the larger yields the area between them.
probability = table_val_2 - table_val_1
print(f'The percentage of the refunds more than $3,500 but less than $4,000 is {probability * 100:.2f}%')

print('\npart c')
value_1 = 2400
value_2 = 4000

z1 = z_value(value_1, mean, std_dev)
print(f'z-value_{value_1}: {z1:.4f}')

z2 = z_value(value_2, mean, std_dev)
print(f'z-value_{value_2}: {z2:.4f}')

table_val_1 = 0.3212
table_val_2 = 0.4846

# value_1 and value_2 are on different sides of the mean so adding their areas yields the area between them
probability = table_val_2 + table_val_1 
print(f'The percentage of the refunds more than $3,500 but less than $4,000 is {probability * 100:.2f}%')

part a
z-value_3500: 1.1962
The percentage of the refunds more than $3,500 is 11.51%

part b
z-value_3500: 1.1962
z-value_4000: 2.1577
The percentage of the refunds more than $3,500 but less than $4,000 is 9.97%

part c
z-value_2400: -0.9192
z-value_4000: 2.1577
The percentage of the refunds more than $3,500 but less than $4,000 is 80.58%


## Ex33. The Bureau of Labor Statistics’ American Time Use Survey, [www.bls.gov/data]("https://www.bls.gov/data"), showed that the amount of time spent using a computer for leisure varied greatly by age. Individuals age 75 and over averaged 0.3 hour (18 minutes) per day using a computer for leisure. Individuals ages 15 to 19 spend 1.0 hour per day using a computer for leisure. If these times follow an exponential distribution, find the proportion of each group that spends:

a. Less than 15 minutes per day using a computer for leisure.

b. More than 2 hours.

c. Between 30 minutes and 90 minutes using a computer for leisure.

d. Find the 20th percentile. Eighty percent spend more than what amount of time?

In [8]:
# exponential distributions
def prob_expon_dist(value, rate):
    import math
    return 1 - math.e**(-rate * value)

In [9]:
print('Ex33')
group_a = 'individuals age 75 and over'
group_b = 'individuals ages 15 to 19'
mean_a = 0.3 * 60
mean_b = 1.0 * 60

rate_a = 1 / mean_a
rate_b = 1 / mean_b

print('part a')
ask = 'less than 15 minutes per day using a computer for leisure'
value = 15

prob_a = prob_expon_dist(value, rate_a)
prob_b = prob_expon_dist(value, rate_b)

print(f'The proportion of {group_a} that spend {ask} is {prob_a:.4f}')
print(f'The proportion of {group_b} that spend {ask} is {prob_b:.4f}')

print('\npart b')
ask = 'more than 2 hours per day using a computer for leisure'

value = 2 * 60

prob_a = prob_expon_dist(value, rate_a)
prob_b = prob_expon_dist(value, rate_b)

print(f'The proportion of {group_a} that spend {ask} is {prob_a:.4f}')
print(f'The proportion of {group_b} that spend {ask} is {prob_b:.4f}')

print('\npart c')
ask = 'between 30 minutes and 90 minutes using a computer for leisure'

value_a = 30
value_b = 90

prob_a = prob_expon_dist(value_b, rate_a) - prob_expon_dist(value_a, rate_a) 
prob_b = prob_expon_dist(value_b, rate_b) - prob_expon_dist(value_a, rate_b)

print(f'The proportion of {group_a} that spend {ask} is {prob_a:.4f}')
print(f'The proportion of {group_b} that spend {ask} is {prob_b:.4f}')

print('\npart d')
ask = 'the 20th percentile'
perctile = 20 / 100
import math
x = math.log(1 - perctile) / (-1 / mean_a)
y = math.log(1 - perctile) / (-1 / mean_b)

print(f'For {group_a}, {ask} is {x:.1f}')
print(f'For {group_b}, {ask} is {y:.1f}')

Ex33
part a
The proportion of individuals age 75 and over that spend less than 15 minutes per day using a computer for leisure is 0.5654
The proportion of individuals ages 15 to 19 that spend less than 15 minutes per day using a computer for leisure is 0.2212

part b
The proportion of individuals age 75 and over that spend more than 2 hours per day using a computer for leisure is 0.9987
The proportion of individuals ages 15 to 19 that spend more than 2 hours per day using a computer for leisure is 0.8647

part c
The proportion of individuals age 75 and over that spend between 30 minutes and 90 minutes using a computer for leisure is 0.1821
The proportion of individuals ages 15 to 19 that spend between 30 minutes and 90 minutes using a computer for leisure is 0.3834

part d
For individuals age 75 and over, the 20th percentile is 4.0
For individuals ages 15 to 19, the 20th percentile is 13.4


## Ex35. If a continuous random variable, x, is uniformly distributed with a minimum value of 5 and a maximum value of 25:

a. What is the probability that x = 10? Why?

b. What is the probability that x = 13.4? Why?

In [10]:
min = 5
max = 25

print('part a')
x = 10
prob = prob_x_uniform_dist(x, min, max)
print(f'The probability that x = {x} is {prob:.4f}')
print('This is because a continuous probability distribution requires a lower and upper bound to identify \nthe area under the distribution that represents the probability of falling in that area. A single \npoint does not have a lower and upper bound.')

print('\npart b')
x = 13.4
prob = prob_x_uniform_dist(x, min, max)
print(f'The probability that x = {x} is {prob:.4f}')
print('This is because a continuous probability distribution requires a lower and upper bound to identify \nthe area under the distribution that represents the probability of falling in that area. A single \npoint does not have a lower and upper bound.')



part a
The probability that x = 10 is 0.0000
This is because a continuous probability distribution requires a lower and upper bound to identify 
the area under the distribution that represents the probability of falling in that area. A single 
point does not have a lower and upper bound.

part b
The probability that x = 13.4 is 0.0000
This is because a continuous probability distribution requires a lower and upper bound to identify 
the area under the distribution that represents the probability of falling in that area. A single 
point does not have a lower and upper bound.


## Ex37. The amount of cola in a 12-ounce can is uniformly distributed between 11.96 ounces and 12.05 ounces.

a. What is the mean amount per can?

b. What is the standard deviation amount per can?

c. What is the probability of selecting a can of cola and finding it has less than 12 ounces?

d. What is the probability of selecting a can of cola and finding it has more than 11.98 ounces?

e. What is the probability of selecting a can of cola and finding it has more than 11.00 ounces?

In [11]:
min = 11.96
max = 12.05
mean = mean_uniform_dist(min, max)
variance = variance_uniform_dist(min, max)
std_dev = std_dev_uniform_dist(min, max)

print('part a')
print(f'The mean amount per can is {mean:.4f}.')

print('\npart b')
print(f'The standard deviation amount per can is {std_dev:.4f}.')

print('\npart c')
x = 12
prob = prob_lt_x_uniform_dist(x, min, max)
print(f'The probability of selecting a can of cola and finding it has less than {x} ounces is {prob:.4f}')

print('\npart d')
x = 11.98
prob = prob_gt_x_uniform_dist(x, min, max)
print(f'The probability of selecting a can of cola and finding it has more than {x} ounces is {prob:.4f}')

print('\npart e')
x = 11.00
prob = prob_gt_x_uniform_dist(x, min, max)
print(f'The probability of selecting a can of cola and finding it has less than {x} ounces is {prob:.4f}')

TypeError: 'float' object is not callable

## Ex43. In 2018, the U.S. Department of Agriculture issued a [report](http://www.cnpp.usda.gov/sites/default/files/CostofFoodNov2018.pdf) indicating a family of four spent an average of \$1,054.50 per month on food. This is for a family of four (two parents aged 19 to 50) and two children (one whose age is between 6 and 8 years and one between 9 and 11 years). Assume the distribution of food expenditures for a family of four follows the normal distribution with a standard deviation of \$120 per month.

a. What percent of the families spend more than \$900 but less than \$1,054.50 per month on food?

b. What percent of the families spend less than \$900 per month on food?

c. What percent spend between \$900 and \$1,200 per month on food?

d. What percent spend between \$900 and \$1,000 per month on food?


In [124]:
mean = 1054.50
std_dev = 120.00

print('part a')
value = 900.00

z = z_value(value, mean, std_dev)
print(f'z-value_{value}: {z:.2f}')

table_val = 0.4015

# the table_val gives us the area from mean to value.
probability = table_val

print(f'{probability * 100:.2f} percent of the families spend more than ${value:.2f} but less than ${mean:.2f} per month on food.')

print('\npart b')
value = 900.00

z = z_value(value, mean, std_dev)
print(f'z-value_{value}: {z:.2f}')

table_val = 0.4015

# the table_val gives us the area from mean to value. we want the area from the value and above.
# the area above the mean is always .5. Therefore we subtract the area between the mean and the 
# value (table_val) to get the area above the value. 
probability = .5000 - table_val

print(f'{probability * 100:.2f} percent of the families spend more than ${value:.2f} but less than ${mean:.2f} per month on food.')

print('\npart c')
value1 = 900.00
value2 = 1200.00

z1 = z_value(value1, mean, std_dev)
z2 = z_value(value2, mean, std_dev)

print(f'z-value_{value1}: {z1:.2f}')
print(f'z-value_{value2}: {z2:.2f}')

table_val_1 = 0.4015
table_val_2 = 0.3869

# the table_val gives us the area from mean to value. table_val_1 and table_val_2 are on opposite sides of the mean so we add them to
# to get the area under the curve that we are looking for.
probability= table_val_1 + table_val_2

print(f'{probability * 100:.2f} percent of the families spend more than ${value1:.2f} but less than ${value2:.2f} per month on food.')

print('\npart d')
value1 = 900.00
value2 = 1000.00

z1 = z_value(value1, mean, std_dev)
z2 = z_value(value2, mean, std_dev)

print(f'z-value_{value1}: {z1:.2f}')
print(f'z-value_{value2}: {z2:.2f}')

table_val_1 = 0.4015
table_val_2 = 0.1736

# the table_val gives us the area from mean to value. table_val_1 and table_val_2 are on the same side of the mean so we subtract them to
# to get the area under the curve that we are looking for.
probability= table_val_1 - table_val_2

print(f'{probability * 100:.2f} percent of the families spend more than ${value1:.2f} but less than ${value2:.2f} per month on food.')

part a
z-value_900.0: -1.29
40.15 percent of the families spend more than $900.00 but less than $1054.50 per month on food.

part b
z-value_900.0: -1.29
9.85 percent of the families spend more than $900.00 but less than $1054.50 per month on food.

part c
z-value_900.0: -1.29
z-value_1200.0: 1.21
78.84 percent of the families spend more than $900.00 but less than $1200.00 per month on food.

part d
z-value_900.0: -1.29
z-value_1000.0: -0.45
22.79 percent of the families spend more than $900.00 but less than $1000.00 per month on food.


## Ex47. A recent study reported that Americans spend an average of 270 minutes per day watching TV. Assume the distribution of minutes per day watching TV follows a normal distribution with a standard deviation of 23 minutes.

a. What percent of the population watch more than 300 minutes per day?

b. What percent of the population watch more than 220 minutes per day?

c. What percent of the population watch between 220 minutes and 300 minutes?

d. Let’s define a “binge watcher” as someone in the upper 5% of the distribution of minutes watching TV. How many minutes does a “binge watcher” spend per day watching TV?

In [1]:
mean = 270
std_dev = 23

print('part a')
value = 300
z = z_value(value, mean, std_dev)
print(f'z-value_{value}: {z:.2f}')
table_val_a = 0.4032

# the table_val gives us the area from mean to value. We want to area above the value.
# Because the entire area above the mean totals .5 of the distribution, we want .5 - table_val.
probability = .5 - table_val_a
print(f'{probability:.4f} percent of the population watch more than 300 minutes per day.')

print('\npart b')
value = 220
z = z_value(value, mean, std_dev)
print(f'z-value_{value}: {z:.2f}')
table_val_b = 0.4850

# the table_val gives us the area from mean to value. We want to area above the value.
# Because the entire area above the mean totals .5 of the distribution, we want .5 - table_val.
probability = .5 + table_val_b
print(f'{probability:.4f} percent of the population watch more than 300 minutes per day.')

print('\npart c')
# parts a and b give us the table value for the values we are interested in.
# because they are on the opposite sides of the mean and we are looking for the area
# between them we can add the table values to get what we are looking for.
probability = table_val_a + table_val_b
print(f'{probability:.4f} percent of the population watch more than 300 minutes per day.')

print('\npart d')
table_val = .4500
nearest_table_val = .4495
z = 1.64

# z = (x - mean) / std_dev
x = z * std_dev + mean
print(f'A “binge watcher” spend {x:.1f} minutes (or {x / 60:.1f} hours) per day watching TV.')

part a


NameError: name 'z_value' is not defined

## Ex57. “Boot time” (the time between the appearance of the Bios screen to the first file that is loaded in Windows) on Eric Mouser’s personal computer follows an exponential distribution with a mean of 27 seconds. What is the probability his “boot” will require:

a. Less than 15 seconds?

b. More than 60 seconds?

c. Between 30 and 45 seconds?

d. What is the point below which only 10% of the boots occur?

In [150]:
mean = 27
rate = 1 / mean

print('part a')
value = 15
prob = prob_expon_dist(value, rate)
print(f'The probability his “boot” will require less than 15 seconds is {prob:.4f}.')

print('\npart b')
value = 60
prob = 1 - prob_expon_dist(value, rate)
print(f'The probability his “boot” will require more than 60 seconds is {prob:.4f}.')

print('\npart c')
value1 = 30
prob1 = prob_expon_dist(value1, rate)

value2 = 45
prob2 = prob_expon_dist(value2, rate)

prob = prob2 - prob1
print(f'The probability his “boot” will require between 30 and 45 seconds is {prob:.4f}.')

print('\npart d')
perctile = 10 / 100
import math
x = math.log(1 - perctile) / (-1 / mean)

print(f'The point below which only 10% of the boots occur is {x:.2f} seconds.')

part a
The probability his “boot” will require less than 15 seconds is 0.4262.

part b
The probability his “boot” will require more than 60 seconds is 0.1084.

part c
The probability his “boot” will require between 30 and 45 seconds is 0.1403.

part d
The point below which only 10% of the boots occur is 2.84 seconds.


## Ex71. Does not exist in the 18th Edition text.

## Ex73. Does not exist in the 18th Edition text.