In [108]:
import numpy as np
import math
import scipy.stats as sp

s = '---1.1--- \nA z-test can be used.  \nNull Hypothesis: The mean is 0.75\n'
s+= 'Alternative Hypothesis: The mean is not 0.75\n'
print(s)

hypothesis = 0.75

with open('eng1.txt', "r") as f:
    fin = f.readlines()
data = [float(x) for x in fin]

N = len(data)
avg = np.mean(data)
sd = np.std(data, ddof=1)

se = (sd / math.sqrt(N))
z = (avg - hypothesis) / se

print('---1.2---')
p = 2 * sp.norm.cdf(z)
print(f'Sample Size: {N}\nSample Mean: {avg}\nStandard Error: {se}\n', end='')
print(f'Standard Score: {z}\nP-value: {p}\n', end='')

---1.1--- 
A z-test can be used.  
Null Hypothesis: The mean is 0.75
Alternative Hypothesis: The mean is not 0.75

---1.2---
Sample Size: 937
Sample Mean: 0.7430304110448239
Standard Error: 0.004153027288269652
Standard Score: -1.6781948375012814
P-value: 0.09330906925243751


In [111]:
print('--1.2 cont.---')
print('The results are significant at a level of 0.1, \nbut not at levels of 0.05 or 0.01.\n')
print('We can conclude that the null hypothesis is reasonably accurate, \nbut we cannot conclude if it is accurate enough.')

--1.2 cont.---
The results are significant at a level of 0.1, 
but not at levels of 0.05 or 0.01.

We can conclude that the null hypothesis is reasonably accurate, 
but we cannot conclude if it is accurate enough.


In [115]:
import numpy as np
import math
import scipy.stats as sp
with open('eng1.txt', "r") as f:
    fin = f.readlines()
ata = [float(x) for x in fin]
N = len(data)
avg = np.mean(data)
sd = np.std(data, ddof=1)
hypothesis = 0.75


c = 0.95
z_c = sp.norm.ppf(1-(1-c)/2)

se_new = (avg - hypothesis) / z_c
se_new = abs(se_new)
N_new = (sd / se_new) ** 2

z = (avg - hypothesis) / se_new
p = 2 * sp.norm.cdf(z)
N_new = (sd / se_new) ** 2

print("---1.3---")
print(f'Minimum SE: {se_new}\nMinimum Sample Size: {N_new}\n\nP-value using MinSE: {p}')
print('The "P-value using MinSE" demonstrates the minimum SE is accurate.')

---1.3---
Minimum SE: 0.003555978074164273
Minimum Sample Size: 1278.0593191131147

P-value using MinSE: 0.05
The "P-value using MinSE" demonstrates the minimum SE is accurate.


In [5]:
import numpy as np
import math
import scipy.stats as sp

print('---1.4---')
print('A two-sample z=test.')
print('Null Hypothesis: Means of the two sets of students are the same.')
print('Alternative Hypothesis: Means of the two sets of students are not the same.\n\n')

with open('eng0.txt', "r") as f:
    fin = f.readlines()
data0 = [float(x) for x in fin]
with open('eng1.txt', "r") as f:
    fin = f.readlines()
data1 = [float(x) for x in fin]
N0 = len(data0)
N1 = len(data1)

avg0 = np.mean(data0)
avg1 = np.mean(data1)
sd0 = np.std(data0, ddof=1)
sd1 = np.std(data1, ddof=1)
sd = (sd0**2)/N0
sd += (sd1**2)/N1
sd = math.sqrt(sd)

se = sd
u = avg1 - avg0
z = (0 - u) / se
p = 2 * (sp.norm.cdf(z))


print('---1.5---')
print(f'Sample Size 0: {N0}\nSample Size 1: {N1}\nSample Mean 0: {avg0}\nSample Mean 1: {avg1}')
print(f'Standard Error: {se}\nZ Score: {z}\nP-value: {p}\n', end='')

---1.4---
A two-sample z=test.
Null Hypothesis: Means of the two sets of students are the same.
Alternative Hypothesis: Means of the two sets of students are not the same.


---1.5---
Sample Size 0: 1977
Sample Size 1: 937
Sample Mean 0: 0.6399545077035914
Sample Mean 1: 0.7430304110448239
Standard Error: 0.007065420910043284
Z Score: -14.588784540028351
P-value: 3.3104307168195455e-48


In [86]:
# --- PROBLEM 2 ---

In [7]:
import numpy as np
import math
import scipy.stats as sp

data = [3, -3, 3, 12, 15, -16, 17, 19, 23, -24, 32]
N = len(data)
avg = np.mean(data)
sd = np.std(data, ddof=1)

c = 0.95
t_c = sp.t.ppf(1 - (1 - c)/2, df=N-1)
se = sd / math.sqrt(N)
diff = t_c * se
u = (avg - diff, avg + diff)

print('---2.1---')
print(f'Sample Mean: {avg}\nStandard Error: {se}\n', end='')
print(f'Standard Statistic: {t_c}\nInterval: {u}')

---2.1---
Sample Mean: 7.363636363636363
Standard Error: 5.0762776757504415
Standard Statistic: 2.2281388519649385
Interval: (-3.9470151490654715, 18.674287876338198)


In [13]:
import numpy as np
import math
import scipy.stats as sp

data = [3, -3, 3, 12, 15, -16, 17, 19, 23, -24, 32]
N = len(data)
avg = np.mean(data)
sd = np.std(data, ddof=1)

c = 0.90
t_c = sp.t.ppf(1 - (1 - c)/2, df=N-1)
se = sd / math.sqrt(N)
diff = t_c * se
u = (avg - diff, avg + diff)
print('---2.2---')
print(f'Sample Mean: {avg}\nStandard Error: {se}\n', end='')
print(f'Standard Statistic: {t_c}\nInterval: {u}\n')

print('The 90% confidence interval is slightly narrower than the 95% interval, and this makes sense.')
print('As c% decreases, we can be less and less sure that our results are accurate, approacing a range of 0.')
print('Therefore, a decrease in range by 4 is reasonable (range ~= 22.5 --> range ~= 18.5)')

---2.2---
Sample Mean: 7.363636363636363
Standard Error: 5.0762776757504415
Standard Statistic: 1.8124611228107335
Interval: (-1.8369195722533416, 16.56419229952607)

The 90% confidence interval is slightly narrower than the 95% interval, and this makes sense.
As c% decreases, we can be less and less sure that our results are accurate, approacing a range of 0.
Therefore, a decrease in range by 4 is reasonable (range ~= 22.5 --> range ~= 18.5)


In [15]:
import numpy as np
import math
import scipy.stats as sp

data = [3, -3, 3, 12, 15, -16, 17, 19, 23, -24, 32]
N = len(data)
avg = np.mean(data)
sd = 16.836

c = 0.95
z_c = sp.norm.ppf(1-(1-c)/2)

se = sd / math.sqrt(N)
diff =z_c * se
u = (avg - diff, avg + diff)
print('---2.3---')
print(f'Sample Mean: {avg}\nStandard Error: {se}\n', end='')
print(f'Standard Statistic: {t_c}\nInterval: {u}\n')

print('The given standard deviation resulted in a lower standard error calculation than previous analyses.')
print('This in turn led to a confidence interval narrower than the one constructed in Problem 2 Part 1.')

---2.3---
Sample Mean: 7.363636363636363
Standard Error: 5.076244997311228
Standard Statistic: 1.8124611228107335
Interval: (-2.585621007795268, 17.312893735067995)

The given standard deviation resulted in a lower standard error calculation than previous analyses.
This in turn led to a confidence interval narrower than the one constructed in Problem 2 Part 1.


In [None]:
print('---2.4---')
