https://www.datascience.com/blog/7-methods-to-fit-linear-model-python

In [1]:
import pandas as pd
import numpy as np
# ordinary least squares linear regression
import statsmodels.formula.api as smf

import matplotlib.pyplot as plt
%matplotlib inline

## read in data

In [2]:
# read data into a DataFrame
data = pd.read_csv('http://www-bcf.usc.edu/~gareth/ISL/Advertising.csv', index_col=0)
data.head()

Unnamed: 0,TV,radio,newspaper,sales
1,230.1,37.8,69.2,22.1
2,44.5,39.3,45.1,10.4
3,17.2,45.9,69.3,9.3
4,151.5,41.3,58.5,18.5
5,180.8,10.8,58.4,12.9


In [4]:
r = smf.ols(formula='sales ~ TV + radio + newspaper', data=data).fit()

In [5]:
r.params

Intercept    2.938889
TV           0.045765
radio        0.188530
newspaper   -0.001037
dtype: float64

### Variance-Covariance Matrix

In [6]:
r.cov_params()

Unnamed: 0,Intercept,TV,radio,newspaper
Intercept,0.097287,-0.0002657273,-0.001115489,-0.0005910212
TV,-0.000266,1.945737e-06,-4.470395e-07,-3.26595e-07
radio,-0.001115,-4.470395e-07,7.415335e-05,-1.780062e-05
newspaper,-0.000591,-3.26595e-07,-1.780062e-05,3.446875e-05


### Residual Sum of Squares:
RSS = sum(residuals**2)

In [7]:
r.ssr  # RSS

556.8252629021871

### Residuals

In [8]:
r.rsquared

0.8972106381789522

In [9]:
r.rsquared_adj

0.8956373316204668

In [10]:
r.tvalues

Intercept     9.422288
TV           32.808624
radio        21.893496
newspaper    -0.176715
dtype: float64

In [11]:
r.t_test()

TypeError: t_test() missing 1 required positional argument: 'r_matrix'

In [12]:
r.t_test_pairwise()

TypeError: t_test_pairwise() missing 1 required positional argument: 'term_name'

In [13]:
print(r.summary())

                            OLS Regression Results                            
Dep. Variable:                  sales   R-squared:                       0.897
Model:                            OLS   Adj. R-squared:                  0.896
Method:                 Least Squares   F-statistic:                     570.3
Date:                Mon, 11 Feb 2019   Prob (F-statistic):           1.58e-96
Time:                        19:08:54   Log-Likelihood:                -386.18
No. Observations:                 200   AIC:                             780.4
Df Residuals:                     196   BIC:                             793.6
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      2.9389      0.312      9.422      0.0

In [28]:
r.df_resid

196.0

In [29]:
r.diagn

{'jb': 151.2414204760385,
 'jbpv': 1.4399347942390942e-33,
 'skew': -1.3273536543621256,
 'kurtosis': 6.331891289062514,
 'omni': 60.413959455255494,
 'omnipv': 7.608085583221822e-14,
 'condno': 454.37490123426034,
 'mineigval': 29.196634362492418}

In [21]:
r.cov_params()

Unnamed: 0,Intercept,TV,radio,newspaper
Intercept,0.097287,-0.0002657273,-0.001115489,-0.0005910212
TV,-0.000266,1.945737e-06,-4.470395e-07,-3.26595e-07
radio,-0.001115,-4.470395e-07,7.415335e-05,-1.780062e-05
newspaper,-0.000591,-3.26595e-07,-1.780062e-05,3.446875e-05


In [37]:
r.cov_kwds

{'description': 'Standard Errors assume that the covariance matrix of the errors is correctly specified.'}

In [38]:
r.cov_type

'nonrobust'

In [31]:
r.eigenvals

array([6.02783643e+06, 1.58827294e+05, 3.78300718e+04, 2.91966344e+01])

In [34]:
r.llf

-386.1811179443831

In [35]:
r.pvalues

Intercept    1.267295e-17
TV           1.509960e-81
radio        1.505339e-54
newspaper    8.599151e-01
dtype: float64

In [15]:
r.bse

Intercept    0.311908
TV           0.001395
radio        0.008611
newspaper    0.005871
dtype: float64

In [16]:
r.aic

780.3622358887662

In [17]:
r.bic

793.5555053549584

In [18]:
print(r.summary2())

                 Results: Ordinary least squares
Model:              OLS              Adj. R-squared:     0.896   
Dependent Variable: sales            AIC:                780.3622
Date:               2019-02-11 19:09 BIC:                793.5555
No. Observations:   200              Log-Likelihood:     -386.18 
Df Model:           3                F-statistic:        570.3   
Df Residuals:       196              Prob (F-statistic): 1.58e-96
R-squared:          0.897            Scale:              2.8409  
------------------------------------------------------------------
                Coef.   Std.Err.     t     P>|t|    [0.025  0.975]
------------------------------------------------------------------
Intercept       2.9389    0.3119   9.4223  0.0000   2.3238  3.5540
TV              0.0458    0.0014  32.8086  0.0000   0.0430  0.0485
radio           0.1885    0.0086  21.8935  0.0000   0.1715  0.2055
newspaper      -0.0010    0.0059  -0.1767  0.8599  -0.0126  0.0105
--------------------

In [19]:
r.fvalue

570.2707036590944

## Scikit-learn

In [285]:
from sklearn.linear_model import LinearRegression

In [286]:
linreg = LinearRegression( )

In [287]:
ones = np.ones(shape=len(data.TV))   # works
zeros = np.zeros(shape=len(data.TV)) # works
empty = np.empty(shape=len(data.TV)) # works

xdata = np.asarray(data.TV)

# ALL 3 OF THESE WORK THE SAME. THE SECOND COEFF WILL BE 0 SINCE ITS A DUMMY ARRAY IN X. 

X = np.asarray(list(zip(xdata, ones)))  # works
#X = np.asarray(list(zip(xdata, zeros))) # works
#X = np.asarray(list(zip(xdata, empty))) # works

y = np.asarray(data.sales)

In [288]:
result = linreg.fit(X, y) 

# print the intercept and coefficients
print(result.intercept_)
print(result.coef_)

7.0325935491276965
[0.04753664 0.        ]


In [293]:
result.singular_

array([1211.12304443,    0.        ])

## Using Matrices/Vector Algebra

See [Wikipedia page](https://en.wikipedia.org/wiki/Ordinary_least_squares) under the Linear Model section and Matrix/vector formulation subsection. 

${\displaystyle {\hat {\boldsymbol {\beta }}}=(\mathbf {X} ^{\rm {T}}\mathbf {X} )^{-1}\mathbf {X} ^{\rm {T}}\mathbf {y} .}$

${\displaystyle \mathbf {X} ={\begin{bmatrix}X_{11}&X_{12}&\cdots &X_{1p}\\X_{21}&X_{22}&\cdots &X_{2p}\\\vdots &\vdots &\ddots &\vdots \\X_{n1}&X_{n2}&\cdots &X_{np}\end{bmatrix}},\qquad {\boldsymbol {\beta }}={\begin{bmatrix}\beta _{1}\\\beta _{2}\\\vdots \\\beta _{p}\end{bmatrix}},\qquad \mathbf {y} ={\begin{bmatrix}y_{1}\\y_{2}\\\vdots \\y_{n}\end{bmatrix}}.}$

xdata = np.asarray(data.TV)

# X = np.vstack([xdata, np.ones(len(xdata))]).T
# X = np.asmatrix(X)

# OR, A BETTER SOLUTION:
X = np.matrix([xdata, np.ones(len(xdata))]).T  # shape(200, 2)
y = np.asarray(data.sales)

print(X.shape)            # (200, 2)
print(y.shape)            # (200, )
print(np.vstack(y).shape) # (200, 1)

beta = ((X.T * X)**-1) * X.T * np.vstack(y)
beta

### USING NUMPY ARRAY DOENS'T WORK FOR X, EVENTHOUGH IT HAS THE SAME SHAPE AND LOOKS THE SAME. 

In [275]:
X.shape

(200, 2)

In [276]:
X

matrix([[230.1,   1. ],
        [ 44.5,   1. ],
        [ 17.2,   1. ],
        [151.5,   1. ],
        [180.8,   1. ],
        [  8.7,   1. ],
        [ 57.5,   1. ],
        [120.2,   1. ],
        [  8.6,   1. ],
        [199.8,   1. ],
        [ 66.1,   1. ],
        [214.7,   1. ],
        [ 23.8,   1. ],
        [ 97.5,   1. ],
        [204.1,   1. ],
        [195.4,   1. ],
        [ 67.8,   1. ],
        [281.4,   1. ],
        [ 69.2,   1. ],
        [147.3,   1. ],
        [218.4,   1. ],
        [237.4,   1. ],
        [ 13.2,   1. ],
        [228.3,   1. ],
        [ 62.3,   1. ],
        [262.9,   1. ],
        [142.9,   1. ],
        [240.1,   1. ],
        [248.8,   1. ],
        [ 70.6,   1. ],
        [292.9,   1. ],
        [112.9,   1. ],
        [ 97.2,   1. ],
        [265.6,   1. ],
        [ 95.7,   1. ],
        [290.7,   1. ],
        [266.9,   1. ],
        [ 74.7,   1. ],
        [ 43.1,   1. ],
        [228. ,   1. ],
        [202.5,   1. ],
        [177. , 

In [277]:
# THIS HAS SHAPE (200, 2). 
X = np.asarray( [xdata, np.ones(len(xdata))] ).T

In [278]:
X.shape

(200, 2)

In [279]:
X

array([[230.1,   1. ],
       [ 44.5,   1. ],
       [ 17.2,   1. ],
       [151.5,   1. ],
       [180.8,   1. ],
       [  8.7,   1. ],
       [ 57.5,   1. ],
       [120.2,   1. ],
       [  8.6,   1. ],
       [199.8,   1. ],
       [ 66.1,   1. ],
       [214.7,   1. ],
       [ 23.8,   1. ],
       [ 97.5,   1. ],
       [204.1,   1. ],
       [195.4,   1. ],
       [ 67.8,   1. ],
       [281.4,   1. ],
       [ 69.2,   1. ],
       [147.3,   1. ],
       [218.4,   1. ],
       [237.4,   1. ],
       [ 13.2,   1. ],
       [228.3,   1. ],
       [ 62.3,   1. ],
       [262.9,   1. ],
       [142.9,   1. ],
       [240.1,   1. ],
       [248.8,   1. ],
       [ 70.6,   1. ],
       [292.9,   1. ],
       [112.9,   1. ],
       [ 97.2,   1. ],
       [265.6,   1. ],
       [ 95.7,   1. ],
       [290.7,   1. ],
       [266.9,   1. ],
       [ 74.7,   1. ],
       [ 43.1,   1. ],
       [228. ,   1. ],
       [202.5,   1. ],
       [177. ,   1. ],
       [293.6,   1. ],
       [206

Computing beta will throw the following error: 

    ValueError: operands could not be broadcast together with shapes (2,200) (200,2) 

In [281]:
beta = ((X.T * X)**-1) * X.T * np.vstack(y)
beta

ValueError: operands could not be broadcast together with shapes (2,200) (200,2) 

#### The matrix:

In [283]:
X = np.matrix([xdata, np.ones(len(xdata))]).T  # shape(200, 2)
X.T * np.vstack(y)

matrix([[482108.34],
        [  2804.5 ]])

#### The array:

In [284]:
X = np.asarray( [xdata, np.ones(len(xdata))] ).T
X.T * np.vstack(y)

ValueError: operands could not be broadcast together with shapes (2,200) (200,1) 