In [7]:
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt

np.random.seed(43425)

In [8]:
#Preparing artificial data
nsample = 100
x = np.linspace(0, 10, 100)
X = np.column_stack((x, x**2))
beta = np.array([1, 0.1, 10])
e = np.random.normal(size=nsample)

In [9]:
print(e.min(), e.max())

-2.69563099162214 2.4982415199273027


### OLS Estimation

Adding the intercept that is needed by our model. 

In [10]:
X = sm.add_constant(X)
print(X)
y = np.dot(X, beta) + e
print(y.shape)

[[1.00000000e+00 0.00000000e+00 0.00000000e+00]
 [1.00000000e+00 1.01010101e-01 1.02030405e-02]
 [1.00000000e+00 2.02020202e-01 4.08121620e-02]
 [1.00000000e+00 3.03030303e-01 9.18273646e-02]
 [1.00000000e+00 4.04040404e-01 1.63248648e-01]
 [1.00000000e+00 5.05050505e-01 2.55076013e-01]
 [1.00000000e+00 6.06060606e-01 3.67309458e-01]
 [1.00000000e+00 7.07070707e-01 4.99948985e-01]
 [1.00000000e+00 8.08080808e-01 6.52994592e-01]
 [1.00000000e+00 9.09090909e-01 8.26446281e-01]
 [1.00000000e+00 1.01010101e+00 1.02030405e+00]
 [1.00000000e+00 1.11111111e+00 1.23456790e+00]
 [1.00000000e+00 1.21212121e+00 1.46923783e+00]
 [1.00000000e+00 1.31313131e+00 1.72431385e+00]
 [1.00000000e+00 1.41414141e+00 1.99979594e+00]
 [1.00000000e+00 1.51515152e+00 2.29568411e+00]
 [1.00000000e+00 1.61616162e+00 2.61197837e+00]
 [1.00000000e+00 1.71717172e+00 2.94867871e+00]
 [1.00000000e+00 1.81818182e+00 3.30578512e+00]
 [1.00000000e+00 1.91919192e+00 3.68329762e+00]
 [1.00000000e+00 2.02020202e+00 4.081216

In [11]:
y

array([3.33301382e+00, 1.33540599e+00, 7.84630331e-01, 1.36285173e+00,
       4.60313639e+00, 3.76494311e+00, 3.28472321e+00, 6.57211041e+00,
       6.76063851e+00, 9.05050657e+00, 1.17175022e+01, 1.29288521e+01,
       1.83118320e+01, 1.91782866e+01, 2.22991953e+01, 2.57606277e+01,
       2.71349829e+01, 3.27431041e+01, 3.51698812e+01, 3.78562633e+01,
       4.27185846e+01, 4.48680012e+01, 4.98806588e+01, 5.29056001e+01,
       6.04973456e+01, 6.49759355e+01, 6.99632892e+01, 7.58637492e+01,
       7.99411002e+01, 8.92856537e+01, 9.29626630e+01, 1.01803842e+02,
       1.05622343e+02, 1.12631904e+02, 1.18082844e+02, 1.26867677e+02,
       1.32667518e+02, 1.41204823e+02, 1.50256359e+02, 1.55470276e+02,
       1.64810668e+02, 1.72593568e+02, 1.82823750e+02, 1.90658146e+02,
       2.00303343e+02, 2.06654805e+02, 2.17935143e+02, 2.27470117e+02,
       2.36836777e+02, 2.46577516e+02, 2.57859795e+02, 2.65855109e+02,
       2.74719837e+02, 2.88771879e+02, 2.98555173e+02, 3.11057117e+02,
      

In [12]:
#Training the model
model = sm.OLS(y, X)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       1.000
Model:                            OLS   Adj. R-squared:                  1.000
Method:                 Least Squares   F-statistic:                 3.754e+06
Date:                Sun, 17 Nov 2019   Prob (F-statistic):          7.89e-238
Time:                        09:09:36   Log-Likelihood:                -149.94
No. Observations:                 100   AIC:                             305.9
Df Residuals:                      97   BIC:                             313.7
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.4742      0.324      4.555      0.0

In [13]:
#Extracting the quantities
print('Parameters: ', results.params)
print('R2: ', results.rsquared)
print()

Parameters:  [ 1.47415561 -0.03470121 10.0090018 ]
R2:  0.9999870796298561

