In [None]:
# Mock data for linear regression. Use 10 predictive vars and an intercept and generate 1000 samples.
import numpy as np
import pandas as pd

np.random.seed(0)
n = 1000
p = 10
X = np.random.randn(n, p)
beta = np.random.randn(p)
y = np.dot(X, beta) + 3 + np.random.randn(n)

df = pd.DataFrame(X, columns=[f'x{i}' for i in range(p)])
df['y'] = y

In [None]:
# OLS model
import statsmodels.api as sm
X = sm.add_constant(X)
model = sm.OLS(y, X).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.885
Model:                            OLS   Adj. R-squared:                  0.883
Method:                 Least Squares   F-statistic:                     758.1
Date:                Mon, 18 Nov 2024   Prob (F-statistic):               0.00
Time:                        15:01:05   Log-Likelihood:                -1414.6
No. Observations:                1000   AIC:                             2851.
Df Residuals:                     989   BIC:                             2905.
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          3.0536      0.032     95.727      0.0

In [None]:
# Random sample just 40 rows
sampled_df = df.sample(40)

In [None]:
# Fit OLS for sampled_df
X_sampled = sm.add_constant(sampled_df.drop(columns='y'))
model_sampled = sm.OLS(sampled_df['y'], X_sampled).fit()
print(model_sampled.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.913
Model:                            OLS   Adj. R-squared:                  0.882
Method:                 Least Squares   F-statistic:                     30.28
Date:                Mon, 18 Nov 2024   Prob (F-statistic):           1.10e-12
Time:                        15:03:39   Log-Likelihood:                -43.938
No. Observations:                  40   AIC:                             109.9
Df Residuals:                      29   BIC:                             128.5
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          2.9145      0.160     18.164      0.0

In [None]:
# Function to generate bootstraped dfs from sampled_df
def bootstrap(df, n):
    return [df.sample(len(df), replace=True) for _ in range(n)]

In [None]:
list(sampled_df.drop(columns=['y']).columns)

Index(['x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9'], dtype='object')

In [None]:
# Fit OLS model to each bootstraped df. Store the coefficients.
bootstraped_models = [sm.OLS(df['y'], sm.add_constant(df.drop(columns='y'))).fit() for df in bootstrap(sampled_df, 1000)]

# Get the coefficients from each model
coefs = np.array([model.params for model in bootstraped_models])

# Get the 95% confidence interval for each coefficient
ci = np.percentile(coefs, [2.5, 97.5], axis=0)

# Estimate p-values for each coefficient from coefs


# Print the median coefs with confidence intervals
for i, col in enumerate(['intercept'] + list(sampled_df.drop(columns=['y']).columns)):
    print(f'{col}: {np.median(coefs[:, i])} ({ci[0, i]}, {ci[1, i]})')

    

intercept: 1.0
x0: 0.57
x1: 0.0
x2: 1.0
x3: 0.41
x4: 0.088
x5: 0.0
x6: 1.0
x7: 1.0
x8: 0.757
x9: 0.001
intercept: 2.9323013077675477 (2.657652450786758, 3.339612961264398)
x0: 0.029837785189922805 (-0.34567738184780283, 0.3849875216132401)
x1: -1.0126836020022 (-1.4703911474138034, -0.6372224540563698)
x2: 1.6675476280457224 (1.1928583585549248, 2.197013883420873)
x3: -0.04406087102014956 (-0.4367923834545272, 0.38846355643524944)
x4: -0.2587412925295042 (-0.6050112093904666, 0.10453497286298062)
x5: -1.3308070905781617 (-1.662953786488907, -0.7658811774182236)
x6: 0.953306102410945 (0.6460692373990284, 1.2274016805276264)
x7: 1.247946960232062 (0.8954760847084943, 1.6458546141307107)
x8: 0.10637797530746744 (-0.20647670940587556, 0.4521429898547107)
x9: -0.6548315485608427 (-1.0568321071845825, -0.389625484674027)
