https://www.datascience.com/blog/7-methods-to-fit-linear-model-python

In [31]:
import pandas as pd
import numpy as np
# ordinary least squares linear regression
import statsmodels.formula.api as smf

import matplotlib.pyplot as plt
%matplotlib inline

## read in data

In [32]:
# read data into a DataFrame
data = pd.read_csv('http://www-bcf.usc.edu/~gareth/ISL/Advertising.csv', index_col=0)
data.head()

Unnamed: 0,TV,radio,newspaper,sales
1,230.1,37.8,69.2,22.1
2,44.5,39.3,45.1,10.4
3,17.2,45.9,69.3,9.3
4,151.5,41.3,58.5,18.5
5,180.8,10.8,58.4,12.9


In [33]:
?smf.ols

In [34]:
result = smf.ols(formula='sales ~ TV', data=data).fit()

In [35]:
result.params

Intercept    7.032594
TV           0.047537
dtype: float64

### Variance-Covariance Matrix

In [36]:
result.cov_params()

Unnamed: 0,Intercept,TV
Intercept,0.20962,-0.001064
TV,-0.001064,7e-06


### Residual Sum of Squares:
RSS = sum(residuals**2)

In [37]:
result.ssr  # RSS

2102.5305831313517

### Residuals

In [38]:
result.resid.tolist()

[4.129225487234464,
 1.2520259516029295,
 1.449776235424368,
 4.265605425269815,
 -2.727218139417664,
 -0.24616232089496481,
 2.0340496259736724,
 0.45350227082333205,
 -2.641408656851663,
 -5.930414307645039,
 -1.5747654817502994,
 0.1612897499029664,
 1.0360344085664366,
 -1.967415991347119,
 2.2651781384929777,
 6.0787469102602465,
 2.2444222295135674,
 3.9905958330205493,
 0.9778709329073401,
 0.5652593150884968,
 0.5854041803007917,
 -5.817791987926583,
 -2.060077202843554,
 -2.385208559986104,
 -0.29412624810482413,
 -7.529976318968586,
 1.1744205329937856,
 -2.546140917095732,
 0.04029031113699233,
 0.11131963630111308,
 0.4439244680408194,
 -0.4994802540156229,
 -2.053154999217213,
 -2.2583252481377407,
 -2.0818500385676835,
 -8.051494923006533,
 5.679877119299334,
 4.116419410525731,
 1.018577248209155,
 3.629052432143805,
 -0.05876323681419038,
 1.653421094227813,
 -0.2893511802622939,
 -3.967924454719478,
 0.2742367760035105,
 -0.45625928894945034,
 -0.6966301959695649,
 4.7

In [39]:
result.rsquared

0.611875050850071

In [40]:
result.rsquared_adj

0.6099148238341623

In [42]:
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:                  sales   R-squared:                       0.612
Model:                            OLS   Adj. R-squared:                  0.610
Method:                 Least Squares   F-statistic:                     312.1
Date:                Mon, 11 Feb 2019   Prob (F-statistic):           1.47e-42
Time:                        16:43:14   Log-Likelihood:                -519.05
No. Observations:                 200   AIC:                             1042.
Df Residuals:                     198   BIC:                             1049.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      7.0326      0.458     15.360      0.0

In [45]:
result.cov_params()

Unnamed: 0,Intercept,TV
Intercept,0.20962,-0.001064
TV,-0.001064,7e-06


In [47]:
result.bse

Intercept    0.457843
TV           0.002691
dtype: float64

In [48]:
result.aic

1042.0913275363478

In [49]:
result.bic

1048.687962269444

In [43]:
print(result.summary2())

                 Results: Ordinary least squares
Model:              OLS              Adj. R-squared:     0.610    
Dependent Variable: sales            AIC:                1042.0913
Date:               2019-02-11 16:43 BIC:                1048.6880
No. Observations:   200              Log-Likelihood:     -519.05  
Df Model:           1                F-statistic:        312.1    
Df Residuals:       198              Prob (F-statistic): 1.47e-42 
R-squared:          0.612            Scale:              10.619   
--------------------------------------------------------------------
              Coef.    Std.Err.      t      P>|t|    [0.025   0.975]
--------------------------------------------------------------------
Intercept     7.0326     0.4578   15.3603   0.0000   6.1297   7.9355
TV            0.0475     0.0027   17.6676   0.0000   0.0422   0.0528
------------------------------------------------------------------
Omnibus:              0.531         Durbin-Watson:           1.935
Pro

In [53]:
result.fvalue

312.1449943727128

## Scikit-learn

In [285]:
from sklearn.linear_model import LinearRegression

In [286]:
linreg = LinearRegression( )

In [287]:
ones = np.ones(shape=len(data.TV))   # works
zeros = np.zeros(shape=len(data.TV)) # works
empty = np.empty(shape=len(data.TV)) # works

xdata = np.asarray(data.TV)

# ALL 3 OF THESE WORK THE SAME. THE SECOND COEFF WILL BE 0 SINCE ITS A DUMMY ARRAY IN X. 

X = np.asarray(list(zip(xdata, ones)))  # works
#X = np.asarray(list(zip(xdata, zeros))) # works
#X = np.asarray(list(zip(xdata, empty))) # works

y = np.asarray(data.sales)

In [288]:
result = linreg.fit(X, y) 

# print the intercept and coefficients
print(result.intercept_)
print(result.coef_)

7.0325935491276965
[0.04753664 0.        ]


In [293]:
result.singular_

array([1211.12304443,    0.        ])

## Using Matrices/Vector Algebra

See [Wikipedia page](https://en.wikipedia.org/wiki/Ordinary_least_squares) under the Linear Model section and Matrix/vector formulation subsection. 

${\displaystyle {\hat {\boldsymbol {\beta }}}=(\mathbf {X} ^{\rm {T}}\mathbf {X} )^{-1}\mathbf {X} ^{\rm {T}}\mathbf {y} .}$

${\displaystyle \mathbf {X} ={\begin{bmatrix}X_{11}&X_{12}&\cdots &X_{1p}\\X_{21}&X_{22}&\cdots &X_{2p}\\\vdots &\vdots &\ddots &\vdots \\X_{n1}&X_{n2}&\cdots &X_{np}\end{bmatrix}},\qquad {\boldsymbol {\beta }}={\begin{bmatrix}\beta _{1}\\\beta _{2}\\\vdots \\\beta _{p}\end{bmatrix}},\qquad \mathbf {y} ={\begin{bmatrix}y_{1}\\y_{2}\\\vdots \\y_{n}\end{bmatrix}}.}$

xdata = np.asarray(data.TV)

# X = np.vstack([xdata, np.ones(len(xdata))]).T
# X = np.asmatrix(X)

# OR, A BETTER SOLUTION:
X = np.matrix([xdata, np.ones(len(xdata))]).T  # shape(200, 2)
y = np.asarray(data.sales)

print(X.shape)            # (200, 2)
print(y.shape)            # (200, )
print(np.vstack(y).shape) # (200, 1)

beta = ((X.T * X)**-1) * X.T * np.vstack(y)
beta

### USING NUMPY ARRAY DOENS'T WORK FOR X, EVENTHOUGH IT HAS THE SAME SHAPE AND LOOKS THE SAME. 

In [275]:
X.shape

(200, 2)

In [276]:
X

matrix([[230.1,   1. ],
        [ 44.5,   1. ],
        [ 17.2,   1. ],
        [151.5,   1. ],
        [180.8,   1. ],
        [  8.7,   1. ],
        [ 57.5,   1. ],
        [120.2,   1. ],
        [  8.6,   1. ],
        [199.8,   1. ],
        [ 66.1,   1. ],
        [214.7,   1. ],
        [ 23.8,   1. ],
        [ 97.5,   1. ],
        [204.1,   1. ],
        [195.4,   1. ],
        [ 67.8,   1. ],
        [281.4,   1. ],
        [ 69.2,   1. ],
        [147.3,   1. ],
        [218.4,   1. ],
        [237.4,   1. ],
        [ 13.2,   1. ],
        [228.3,   1. ],
        [ 62.3,   1. ],
        [262.9,   1. ],
        [142.9,   1. ],
        [240.1,   1. ],
        [248.8,   1. ],
        [ 70.6,   1. ],
        [292.9,   1. ],
        [112.9,   1. ],
        [ 97.2,   1. ],
        [265.6,   1. ],
        [ 95.7,   1. ],
        [290.7,   1. ],
        [266.9,   1. ],
        [ 74.7,   1. ],
        [ 43.1,   1. ],
        [228. ,   1. ],
        [202.5,   1. ],
        [177. , 

In [277]:
# THIS HAS SHAPE (200, 2). 
X = np.asarray( [xdata, np.ones(len(xdata))] ).T

In [278]:
X.shape

(200, 2)

In [279]:
X

array([[230.1,   1. ],
       [ 44.5,   1. ],
       [ 17.2,   1. ],
       [151.5,   1. ],
       [180.8,   1. ],
       [  8.7,   1. ],
       [ 57.5,   1. ],
       [120.2,   1. ],
       [  8.6,   1. ],
       [199.8,   1. ],
       [ 66.1,   1. ],
       [214.7,   1. ],
       [ 23.8,   1. ],
       [ 97.5,   1. ],
       [204.1,   1. ],
       [195.4,   1. ],
       [ 67.8,   1. ],
       [281.4,   1. ],
       [ 69.2,   1. ],
       [147.3,   1. ],
       [218.4,   1. ],
       [237.4,   1. ],
       [ 13.2,   1. ],
       [228.3,   1. ],
       [ 62.3,   1. ],
       [262.9,   1. ],
       [142.9,   1. ],
       [240.1,   1. ],
       [248.8,   1. ],
       [ 70.6,   1. ],
       [292.9,   1. ],
       [112.9,   1. ],
       [ 97.2,   1. ],
       [265.6,   1. ],
       [ 95.7,   1. ],
       [290.7,   1. ],
       [266.9,   1. ],
       [ 74.7,   1. ],
       [ 43.1,   1. ],
       [228. ,   1. ],
       [202.5,   1. ],
       [177. ,   1. ],
       [293.6,   1. ],
       [206

Computing beta will throw the following error: 

    ValueError: operands could not be broadcast together with shapes (2,200) (200,2) 

In [281]:
beta = ((X.T * X)**-1) * X.T * np.vstack(y)
beta

ValueError: operands could not be broadcast together with shapes (2,200) (200,2) 

#### The matrix:

In [283]:
X = np.matrix([xdata, np.ones(len(xdata))]).T  # shape(200, 2)
X.T * np.vstack(y)

matrix([[482108.34],
        [  2804.5 ]])

#### The array:

In [284]:
X = np.asarray( [xdata, np.ones(len(xdata))] ).T
X.T * np.vstack(y)

ValueError: operands could not be broadcast together with shapes (2,200) (200,1) 