In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm

np.random.seed(1) # set this such that everyone will have identical results

x = np.arange(0,10,0.1)
y = 3*x + np.random.normal(loc=0, scale=3, size=len(x))

# For convenience, create a DataFrame containing x and y.
df = pd.DataFrame(dict(x=x, y=y))

x_sm = sm.add_constant(x)
linreg = sm.OLS(endog=y, exog=x_sm).fit()
print(linreg.summary())

coeffs=np.zeros((1000))
intercepts=np.zeros((1000))
for trial in range(1000):
    sample = df.sample(n=100, replace=True)
    linreg = LinearRegression()
    linreg.fit(sample.x.values.reshape(-1,1), sample.y)
    coeffs[trial]=linreg.coef_
    intercepts[trial]=linreg.intercept_
fig, ax = plt.subplots(1,2, figsize=(17,8))
ax[0].hist(coeffs, ec='black')
ax[0].set_title('Coefficient')
ax[0].axvline(np.percentile(coeffs, 2.5), linestyle='dashed', color='red')
ax[0].axvline(np.percentile(coeffs, 97.5), linestyle='dashed', color='red')

ax[1].hist(intercepts, ec='black')
ax[1].set_title('Intercept')
ax[1].axvline(np.percentile(intercepts, 2.5), linestyle='dashed', color='red')
ax[1].axvline(np.percentile(intercepts, 97.5), linestyle='dashed', color='red')

plt.show()

print('Estimated 95% confidence interval for coefficient: ({},{})'.format(
        round(np.percentile(coeffs, 2.5),3), round(np.percentile(coeffs, 97.5),3)))
print('Estimated 95% confidence interval for intercept : ({},{})'.format(
        round(np.percentile(intercepts, 2.5),3), round(np.percentile(intercepts, 97.5),3)))


KeyboardInterrupt: 