# Importing packages

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
import seaborn as sns

# Generating normal data

In [None]:
#generating normal data with mean = 0 and sd = 1
normal_data = np.random.randn(100000)

In [None]:
#visualizing the distribution of the data
fig, axs = plt.subplots(figsize = (10,5))
axs.hist(normal_data, bins=30)
axs.set_title("Histogram of generated data")
axs.set_xlabel("Bins")
axs.set_ylabel("Count")

In [None]:
# Generating normal data with custom mean and sd
normal_data = np.random.normal(loc=65, scale=3, size=(100000))

In [None]:
#visualizing the distribution of the data
fig, axs = plt.subplots(figsize = (10,5))
axs.hist(normal_data, bins=30)
axs.set_title("Histogram of generated data")
axs.set_xlabel("Bins")
axs.set_ylabel("Count")

In [None]:
# Generating 1000 exam scores following normal distribution with mean 65 and sd = 9
normal_data = np.random.normal(loc=65, scale=9, size=(1000))

In [None]:
#visualizing the distribution of the data
plt.hist(normal_data, bins=20, rwidth=0.8, density=True)
plt.title("Histogram of generated data")
plt.xlabel("Bins")
plt.ylabel("Count")

rng = np.arange(np.min(normal_data), np.max(normal_data), 0.1)
plt.plot(rng, norm.pdf(rng, np.mean(normal_data), np.std(normal_data)))

In [None]:
print("The mean of the scores is ", np.round(np.mean(normal_data)))
print("The standard deviation of the scores is", np.round(np.std(normal_data)))

In [None]:
# calculating z-score for x=55
avg = np.round(np.mean(normal_data))
std = np.round(np.std(normal_data))

z_score = np.round((55 - avg)/std, 2)
print("The z-score for give x value is :", z_score)

In [None]:
# calculating probability
prob = np.round(norm.cdf(z_score),2)

print("The probability of getting score less than 55 is ", prob)

In [None]:
prob_under = norm.cdf(x=-1.11, loc=0, scale=1)
prob_above = 1 - prob_under

print(prob_under)
print(prob_above)

In [None]:
plt.rcParams['figure.figsize'] = (5,5)

plt.fill_between(x=np.arange(-3, -1.11, 0.01),
                y1=norm.pdf(np.arange(-3, -1.11, 0.01)),
                facecolor='red',
                alpha=0.35)
plt.fill_between(x=np.arange(-1, 3, 0.01),
                y1=norm.pdf(np.arange(-1, 3, 0.01)),
                facecolor = "blue",
                alpha = 0.35)
plt.text(x=-1.8, y=0.05, s=round(prob_under,2))
plt.text(x=0.5, y=0.05, s=round(prob_above,2))

plt.title("Standard normal distibution")
plt.xlabel("Z-Score")

In [None]:
# calculating z-score for x=85
avg = np.round(np.mean(normal_data))
std = np.round(np.std(normal_data))

z_score = np.round((85 - avg)/std, 2)
print("The z-score for give x value is :", z_score)

In [None]:
# calculating probability
prob = np.round(norm.cdf(z_score),2)

prob_above_85 = round(1 - prob, 2)

print("The probability of getting score greater than 85 is ", prob_above_85)

In [None]:
prob_under = norm.cdf(x=2.22, loc=0, scale=1)
prob_above = 1 - prob_under

print(prob_under)
print(prob_above)

In [None]:
plt.rcParams['figure.figsize'] = (7,5)

plt.fill_between(x=np.arange(-3, 2.22, 0.01),
                y1=norm.pdf(np.arange(-3, 2.22, 0.01)),
                facecolor='blue',
                alpha=0.35)
plt.fill_between(x=np.arange(2.22, 3, 0.01),
                y1=norm.pdf(np.arange(2.22, 3, 0.01)),
                facecolor = "red",
                alpha = 0.35)
plt.text(x=0, y=0.05, s=round(prob_under,2))
plt.text(x=2.5, y=0.01, s=round(prob_above,2))

plt.title("Standard normal distibution")
plt.xlabel("Z-Score")

In [None]:
# calculating z-score for x=85
avg = np.round(np.mean(normal_data))
std = np.round(np.std(normal_data))

z_score_70 = np.round((70 - avg)/std, 2)
print("The z-score value for x = 70 is :", z_score_70)

z_score_80 = np.round((80 - avg)/std, 2)
print("The z-score value for x = 80 is :", z_score_80)

In [None]:
# calculating probability
prob_70 = norm.cdf(x=0.56, loc = 0, scale = 1)
prob_80 = norm.cdf(x=1.67, loc = 0, scale = 1)

prob_between_70_80 = round(prob_80 - prob_70, 2)

print("The probability of getting score between 70 and 80 is ", prob_between_70_80)

In [None]:
prob_below = prob_70
prob_above = 1 - prob_80
prob_between = prob_between_70_80

print(prob_below)
print(prob_above)
print(prob_between)

In [None]:
plt.rcParams['figure.figsize'] = (7,5)

plt.fill_between(x=np.arange(-3, 0.56, 0.01),
                y1=norm.pdf(np.arange(-3, 0.56, 0.01)),
                facecolor='red',
                alpha=0.35)
plt.fill_between(x=np.arange(0.56, 1.67, 0.01),
                y1=norm.pdf(np.arange(0.56, 1.67, 0.01)),
                facecolor = "blue",
                alpha = 0.35)
plt.fill_between(x=np.arange(1.67, 3, 0.01),
                y1=norm.pdf(np.arange(1.67, 3, 0.01)),
                facecolor = "red",
                alpha = 0.35)
plt.text(x=0, y=0.05, s=round(prob_below,2))
plt.text(x=2.0, y=0.01, s=round(prob_above,2))
plt.text(x=1.12, y=0.05, s=round(prob_between,2))


plt.title("Standard normal distibution")
plt.xlabel("Z-Score")

# Confidence Intervals

In [None]:
#initializing the variables
n = 44
xbar = 10.455
sigma = 7.7

In [None]:
#computing confidence interval for population mean

#Standard Error
SE = round(sigma/np.sqrt(n), 2)
print("Standard Error: ", SE)

#Margin Error at 90% Confidence Level
#Z-score for 90% probability is 1.65

ME = round(1.65*(SE), 2)
print("Margin Error: ", ME)

#confidence interval
lower_limit = round(xbar - ME, 2)
upper_limit = round(xbar + ME, 2)

print("Confidence Interval: ", (lower_limit, upper_limit))

# Confidence Interval using t-distribution

In [None]:
#initializing the variables
data = [98.6,102.1, 100.7, 102.0, 97.0, 
        103.4, 98.9, 101.6, 102.9, 105.2]

xbar = np.mean(data)
n = np.count_nonzero(data)
sigma = np.std(data)
degree_freedom = n - 1
alpha = 0.05/2


print("mean: ", xbar)
print("sample size: ", n)
print("standard deviation: ", sigma)
print("degrees of freedom: ", degree_freedom)
print("alpha :", alpha)

In [None]:
#t value from t table(df=9, alpha/2=0.025)
t_table = 2.262

#standard error
SE = round(sigma/np.sqrt(n), 2)
print("Stnadard Error: ", SE)

#margin error
ME = round(t_table * SE, 2)
print("Margin Error: ", ME)

In [None]:
#confidence interval
lower_limit = xbar - ME
upper_limit = xbar + ME

CI = (lower_limit, upper_limit)

print("Confidence interval for population mean is :", CI)

# Confidence interval for population proportion

In [None]:
#initializing the variables
#sample_proportion 
p = 450/1500
n = 1500
print("sample proportion: ", p)

In [None]:
#z value from z-table(alpha/2 = 0.025)
z_score = 1.65

#calculating standard error
SE = round(np.sqrt((p*(1-p))/n), 4)
print("Standard Error: ", SE)

#calculate margin error
ME = round(1.65*SE, 2)
print("Margin Error: ", ME)

In [None]:
#confidence interval for population proportion
lower_limit = round(p - ME, 4)
upper_limit = round(p + ME, 4)

CI = (lower_limit, upper_limit)
print("Confidence Interval for population proportion is :", CI)

# Confidence interval for variance

In [None]:
#initializing the variables
#significance level = 5%
alpha = 0.01 

#sample sinze
n = 30

#sample standard deviation
var = (1.20 * 1.20)

#degrees of freedom
df = n-1

In [None]:
#import package
from scipy import stats

#calculating confidence interval
upper = round(((n -1) * var / stats.chi2.ppf(alpha/2, df)),4)
lower = round(((n -1) * var / stats.chi2.ppf(1 - alpha/2, df)), 4)

CI = (lower, upper)
print("Confidence interval for variance is :", CI)

# Hypothesis Testing

### One sample t-Test

In [None]:
#creating the data

data = [14, 14, 16, 13, 12, 17, 15, 14, 15, 13, 15, 14]

print("Height of 12 randomly sampled plants: ", data)

In [None]:
#importing packages
import scipy.stats as stats

#perform one sample t-test
stats.ttest_1samp(a=data, popmean=15)

### Two sample t-Test

In [None]:
import numpy as np

group1 = np.array([14, 15, 15, 16, 13, 8, 14, 17, 16, 14, 19, 
                   20, 21, 15, 15, 16, 16, 13, 14, 12])
group2 = np.array([15, 17, 14, 17, 14, 8, 12, 19, 19, 14, 17, 
                   22, 24, 16, 13, 16, 13, 18, 15, 13])

In [None]:
#find variance for each group
print(np.var(group1), np.var(group2))

In [None]:
#find the ratio of variance
print(12.26/7.73)

In [None]:
import scipy.stats as stats

#perform two sample t-test with equal variances
stats.ttest_ind(a=group1, b=group2, equal_var=True)

### Paired sample t-Test

In [None]:
#creating data

pre = [88, 82, 84, 93, 75, 78, 84, 87, 95, 
       91, 83, 89, 77, 68, 91]
post = [91, 84, 88, 90, 79, 80, 88, 90, 90, 
        96, 88, 89, 81, 74, 92]

In [None]:
import scipy.stats as stats

#perform the paired samples t-test
stats.ttest_rel(pre, post)