In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

## P3
Auditors at a small community bank randomly sample 100 withdrawal transactions made during the week at an ATM machine located near the bankâ€™s main branch. Over the past 2 years, the average withdrawal amount has been 50 dollars with a standard deviation of 40 dollars. Since audit investigations are typically expensive, the auditors decide to not initiate further investigations if the mean transaction amount of the sample is between 45 dollars and 55 dollars. What is the probability that in any given week, there will be an investigation?

A.	1.25%

B.	2.5%

C.	10.55%

D.	21.1%

E.	50%


In [2]:
# Given  data:
n_samples = 100
pop_mean = 50
pop_stdev = 40

In [3]:
x_l = 45 # Lower limit of confidence interval
x_h = 55 # Upper limit of confidence interval

In [4]:
# P(investigation) = 1 - P(no investigation)
std_error = pop_stdev/np.sqrt(n_samples) # Standard error = pop stdev/ sqrt(number of samples)
p_l = stats.norm.cdf(x=x_l, loc=pop_mean, scale=std_error) # P(X<45)
p_h = stats.norm.cdf(x=x_h, loc=pop_mean, scale=std_error) # P(X<55)
p_no_investigation = p_h - p_l # P(45<X<55) = P(X<55) - P(X<45)
p_investigation = 1 - p_no_investigation
print("probability that in any given week, there will be an investigation = {:.3f}".format(p_investigation))

probability that in any given week, there will be an investigation = 0.211


## P4
The auditors from the above example would like to maintain the probability of investigation to 5%. Which of the following represents the minimum number transactions that they should sample if they do not want to change the thresholds of 45 and 55? Assume that the sample statistics remain unchanged.

A.	144

B.	150

C.	196

D.	250

E.	Not enough information


In [6]:
# Use the same inital data as p3
# requred P(investigation) = 5% or 0.05
req_p_l = 0.05/2 # 5%/2 since the interval is symmetric, equal area distributed at both tails
req_p_h = 1 - req_p_l
Z_l = stats.norm.ppf(req_p_l) 
Z_h = stats.norm.ppf(req_p_h) # can use Z_l or Z_h to calculate number of samples, but choose lower or upper lim accordingly.
req_n_samples = (Z_h*pop_stdev/(x_h-pop_mean))**2 # Method to calculate number of samples.
print("The minimum number transactions that they should sample = {}".format(np.round(req_n_samples)))

The minimum number transactions that they should sample = 246.0
