import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from itertools import combinations
import plotnine as p
# read data
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
def read_data(file):
return pd.read_stata("" + file)
coefs = np.zeros(1000)
for i in range(1000):
tb = pd.DataFrame({
'x': 9*np.random.normal(size=10000),
'u': 36*np.random.normal(size=10000)})
tb['y'] = 3 + 2*tb['x'].values + tb['u'].values
reg_tb = sm.OLS.from_formula('y ~ x', data=tb).fit()
coefs[i] = reg_tb.params['x']
p.ggplot() +\
p.geom_histogram(p.aes(x=coefs), binwidth = 0.01)