In [1]:
# Imports

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy as scp
import seaborn as sns
import scipy.stats as stats
import statsmodels.stats.power as smp
import statsmodels.api as sm 
import statsmodels.formula.api as smf 

sns.set(rc={'figure.figsize':(12, 12)})

## Q1

In [2]:
n = 236 # Change
barX = 26.2 # Change
s = 3.33 # Change

# Short version of the critical point computation:
cl = 0.90 # Change
alpha = 1 - cl
crit_point = stats.norm.isf(alpha/2, loc = 0, scale = 1)

conf_int = barX + np.array([-1, 1]) * crit_point * s / np.sqrt(n)
print("The confidence interval is ", conf_int)
print("Rounded to 4 digits: [{:.4}, {:.4}]".format(conf_int[0], conf_int[1]))

The confidence interval is  [25.84345407 26.55654593]
Rounded to 4 digits: [25.84, 26.56]


## Q2

In [3]:
n = 209 # Change
barX = -39.4 # Change
s = 1.72 # Change

# Short version of the critical point computation:
cl = 0.99 # Change
alpha = 1 - cl
crit_point = stats.norm.isf(alpha/2, loc = 0, scale = 1)

conf_int = barX + np.array([-1, 1]) * crit_point * s / np.sqrt(n)
print("The confidence interval is ", conf_int)
print("Rounded to 4 digits: [{:.4}, {:.4}]".format(conf_int[0], conf_int[1]))

The confidence interval is  [-39.706459 -39.093541]
Rounded to 4 digits: [-39.71, -39.09]


## Q3

In [20]:
n = 19 # Change
mu0 = 32.53 # Change
barX = 36.29 # Change
s = 8.182 # Change

Zscore = (barX - mu0)/(s/np.sqrt(n))
print("The Z score for this sample is {:.4}".format(Zscore))


sl = 0.95
alpha = 1 - sl
criticalValue = stats.norm.isf(1 - alpha)
print("The critical value (defining the rejection region) is: {:.4}".format(criticalValue))

# pValue = stats.norm.cdf(Zscore) # Change
pValue = stats.norm.sf(Zscore)
# pValue = 2 * stats.norm.sf(Zscore) 

print("And the resulting p-value is: {:.4}".format(pValue))

The Z score for this sample is 2.003
The critical value (defining the rejection region) is: 2.003
And the resulting p-value is: 0.02258


In [36]:
n = 7 # Change
mu0 = -49.96 # Change
barX = -46.5 # Change
s = 4.545 # Change

Zscore = (barX - mu0)/(s/np.sqrt(n))
print("The Z score for this sample is {:.4}".format(Zscore))


sl = 0.95
alpha = 1 - sl
criticalValue = stats.norm.isf(1 - alpha)
print("The critical value (defining the rejection region) is: {:.4}".format(criticalValue))

pValue = stats.t.cdf(Zscore, df = n-1) # Change
pValue = stats.t.sf(Zscore, df = n-1)
# pValue = 2 * stats.norm.sf(Zscore) 

print("And the resulting p-value is: {:.4}".format(pValue))

The Z score for this sample is 2.014
The critical value (defining the rejection region) is: -1.645
And the resulting p-value is: 0.04532


## Q4

In [8]:
n = 16 # Change
mu0 = 37.54 # Change
barX = 32.68 # Change
s = 7.98 # Change

Zscore = (barX - mu0)/(s/np.sqrt(n))
print("The Z score for this sample is {:.4}".format(Zscore))


sl = 0.95
alpha = 1 - sl
criticalValue = stats.norm.isf(1 - alpha)
print("The critical value (defining the rejection region) is: {:.4}".format(criticalValue))

# pValue = stats.norm.cdf(Zscore) # Change
pValue = stats.norm.sf(Zscore)
# pValue = 2 * stats.norm.sf(Zscore) 

print("And the resulting p-value is: {:.4}".format(pValue))

The Z score for this sample is -2.436
The critical value (defining the rejection region) is: -1.645
And the resulting p-value is: 0.9926


## Q5

In [14]:
n = 12 # Change
mu0 = -30.74 # Change
barX = -32.98 # Change
s = 4.949 # Change

Zscore = (barX - mu0)/(s/np.sqrt(n))
print("The Z score for this sample is {:.4}".format(Zscore))


sl = 0.95
alpha = 1 - sl
criticalValue = stats.norm.isf(1 - alpha)
print("The critical value (defining the rejection region) is: {:.4}".format(criticalValue))

# pValue = stats.norm.cdf(Zscore) # Change
# pValue = stats.norm.sf(Zscore)
pValue = 2 * stats.norm.cdf(Zscore)

print("And the resulting p-value is: {:.4}".format(pValue))

The Z score for this sample is -1.568
The critical value (defining the rejection region) is: -1.645
And the resulting p-value is: 0.1169


## Q6

In [12]:
link = 'data/regresionLinealSimple_01.csv'
data = pd.read_csv(link)
# The linear regression model in Scikit requires that
# the X array be two-dimensional. But in our case a 
# single column gives a 1D array. That is why, after
# extracting the values from the columns we reshape it.
X = data.x.values
X = X[:, np.newaxis]

# For Y it is enough to extract the values,
# because Scikit already expects it to be 1D
Y = data.y.values

# Now we load the required function from the scikit module
from sklearn.linear_model import LinearRegression
# Create the model
modelXY = LinearRegression(fit_intercept=True)
# Fit the model to 
XY_fit = modelXY.fit(X, Y)

# And extract the coefficients. 
# Note that b1 is returned as an array 
b1 = XY_fit.coef_[0]
b0 = XY_fit.intercept_

print("The regression line is y = {:.6} + {:.6} x".format(b0, b1))

The regression line is y = 0.807531 + 0.76039 x


In [13]:
link = 'data/regresionLinealSimple_02.csv'
data = pd.read_csv(link)
# The linear regression model in Scikit requires that
# the X array be two-dimensional. But in our case a 
# single column gives a 1D array. That is why, after
# extracting the values from the columns we reshape it.
X = data.x.values
X = X[:, np.newaxis]

# For Y it is enough to extract the values,
# because Scikit already expects it to be 1D
Y = data.y.values

# Now we load the required function from the scikit module
from sklearn.linear_model import LinearRegression
# Create the model
modelXY = LinearRegression(fit_intercept=True)
# Fit the model to 
XY_fit = modelXY.fit(X, Y)

# And extract the coefficients. 
# Note that b1 is returned as an array 
b1 = XY_fit.coef_[0]
b0 = XY_fit.intercept_

print("The regression line is y = {:.6} + {:.6} x".format(b0, b1))

The regression line is y = 3.01396 + 0.699292 x
