https://www.datascience.com/blog/7-methods-to-fit-linear-model-python

# Several Methods for Linear Model Fitting in Python

Reorganize:

start with truly linear least squares then polyfits, then scipy optimize curve fit then scipy optimize any funtion. 

OR BY package

In [4]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
# allow plots to appear directly in the notebook
%matplotlib inline

#### read in data

In [5]:
# read data into a DataFrame
data = pd.read_csv('http://www-bcf.usc.edu/~gareth/ISL/Advertising.csv', index_col=0)
data.head()

Unnamed: 0,TV,radio,newspaper,sales
1,230.1,37.8,69.2,22.1
2,44.5,39.3,45.1,10.4
3,17.2,45.9,69.3,9.3
4,151.5,41.3,58.5,18.5
5,180.8,10.8,58.4,12.9


## 1. Scipy's polyfit, 1st order polynomial is a line.

In [296]:
import scipy

In [297]:
scipy.polyfit(x=data.TV, y=data.sales, deg=1, cov=True)

(array([0.04753664, 7.03259355]), array([[ 7.31323813e-06, -1.07535682e-03],
        [-1.07535682e-03,  2.11759139e-01]]))

## 2. Numpy's polyfit

In [298]:
np.polyfit?

In [299]:
np.polyfit(x=data.TV, y=data.sales, deg=1,  cov=True)

(array([0.04753664, 7.03259355]), array([[ 7.31323813e-06, -1.07535682e-03],
        [-1.07535682e-03,  2.11759139e-01]]))

## 3. Scipy's linregress

In [300]:
from scipy import stats

In [301]:
stats.linregress?

In [302]:
stats.linregress(x=data.TV, y=data.sales)

LinregressResult(slope=0.047536640433019736, intercept=7.0325935491276965, rvalue=0.7822244248616064, pvalue=1.4673897001947178e-42, stderr=0.0026906071877968716)

## 4. Scipy's optimize.curve_fit

In [22]:
scipy.optimize.curve_fit?

In [303]:
def line(x, m, b):
    return m*x+b

In [321]:
?scipy.optimize.curve_fit

In [304]:
scipy.optimize.curve_fit(f=line, xdata=data.TV, ydata=data.sales, )#p0 = np.asarray([1,1]))

(array([0.04753664, 7.03259358]), array([[ 7.23936702e-06, -1.06449463e-03],
        [-1.06449463e-03,  2.09620159e-01]]))

In [305]:
# with initial estimates
scipy.optimize.curve_fit(f=line, xdata=data.TV, ydata=data.sales, p0 = np.asarray([1,1]))

(array([0.04753664, 7.03259358]), array([[ 7.23936702e-06, -1.06449463e-03],
        [-1.06449463e-03,  2.09620159e-01]]))

## scipy.optimize.leastsq  and scipy.optimize.least_squares
Don't know what these are. Doesn't work.

In [339]:
def line(x, m, b):
    #m,b = pars
    return m*x+b

In [340]:
scipy.optimize.least_squares(fun=line, x0=np.asarray([1,1]))#, args=(data.sales))

TypeError: line() missing 2 required positional arguments: 'm' and 'b'

In [341]:
scipy.optimize.leastsq(func=line, x0=np.asarray([1,1]), args=(np.asarray(data.sales)))

TypeError: line() missing 1 required positional argument: 'b'

## 5. Numpy's linalg.lstsq

https://docs.scipy.org/doc/numpy-1.15.1/reference/generated/numpy.linalg.lstsq.html

In [342]:
np.linalg.lstsq?

In [343]:
# a - xvalues, b = yvalues. the xvalues needs a column of 1s appended to it. 

In [344]:
ones = np.ones(shape=len(data.TV))

In [345]:
xdata = np.asarray(data.TV)

In [346]:
A2 = np.vstack([xdata, np.ones(len(xdata))]).T  # same as A below. 

In [347]:
A = np.asarray(list(zip(xdata, ones)))

In [348]:
B = np.asarray(data.sales)

In [349]:
A.shape, B.shape

((200, 2), (200,))

In [350]:
np.linalg.lstsq(a=A, b=B)

  """Entry point for launching an IPython kernel.


(array([0.04753664, 7.03259355]),
 array([2102.53058313]),
 2,
 array([2406.50529477,    7.1173192 ]))

### Scipy uses the same function from numpy

In [352]:
scipy.linalg.lstsq(a=A, b=B)

(array([0.04753664, 7.03259355]),
 2102.530583131351,
 2,
 array([2406.50529477,    7.1173192 ]))

## Statsmodels.ols

In [67]:
import statsmodels.formula.api as smf

In [69]:
?smf.ols

In [71]:
result = smf.ols(formula='sales ~ TV', data=data).fit()

In [72]:
result.params

Intercept    7.032594
TV           0.047537
dtype: float64

In [73]:
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:                  sales   R-squared:                       0.612
Model:                            OLS   Adj. R-squared:                  0.610
Method:                 Least Squares   F-statistic:                     312.1
Date:                Fri, 11 Jan 2019   Prob (F-statistic):           1.47e-42
Time:                        09:25:15   Log-Likelihood:                -519.05
No. Observations:                 200   AIC:                             1042.
Df Residuals:                     198   BIC:                             1049.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      7.0326      0.458     15.360      0.0

In [74]:
print(result.summary2())

                 Results: Ordinary least squares
Model:              OLS              Adj. R-squared:     0.610    
Dependent Variable: sales            AIC:                1042.0913
Date:               2019-01-11 09:25 BIC:                1048.6880
No. Observations:   200              Log-Likelihood:     -519.05  
Df Model:           1                F-statistic:        312.1    
Df Residuals:       198              Prob (F-statistic): 1.47e-42 
R-squared:          0.612            Scale:              10.619   
--------------------------------------------------------------------
              Coef.    Std.Err.      t      P>|t|    [0.025   0.975]
--------------------------------------------------------------------
Intercept     7.0326     0.4578   15.3603   0.0000   6.1297   7.9355
TV            0.0475     0.0027   17.6676   0.0000   0.0422   0.0528
------------------------------------------------------------------
Omnibus:              0.531         Durbin-Watson:           1.935
Pro

## Scikit-learn

In [1]:
from sklearn.linear_model import LinearRegression

In [2]:
linreg = LinearRegression( )

In [34]:
ones  = np.ones(shape=len(data.TV))  
xdata = np.asarray(data.TV)

# EITHER OF THE FOLLOWING X'S WORK. THEY ARE THE SAME. 
# X = np.asarray(list(zip(ones, xdata)))
# X = np.vstack([ones, xdata]).T
#X = np.asmatrix([ones, xdata]).T
X = np.asmatrix(xdata).T
y = np.asarray(data.sales)

In [36]:
result = linreg.fit(X, y) 

# print the intercept and coefficients
print(result.intercept_)
print(result.coef_)

7.0325935491276965
[0.04753664]


Just like numpy.linalg.lstsq, the X needs to be 2D, so we attached a second column of ones. 

## Using Matrices/Vector Algebra

See [Wikipedia page](https://en.wikipedia.org/wiki/Ordinary_least_squares) under the Linear Model section and Matrix/vector formulation subsection. 

${\displaystyle {\hat {\boldsymbol {\beta }}}=(\mathbf {X} ^{\rm {T}}\mathbf {X} )^{-1}\mathbf {X} ^{\rm {T}}\mathbf {y} .}$

${\displaystyle \mathbf {X} ={\begin{bmatrix}X_{11}&X_{12}&\cdots &X_{1p}\\X_{21}&X_{22}&\cdots &X_{2p}\\\vdots &\vdots &\ddots &\vdots \\X_{n1}&X_{n2}&\cdots &X_{np}\end{bmatrix}},\qquad {\boldsymbol {\beta }}={\begin{bmatrix}\beta _{1}\\\beta _{2}\\\vdots \\\beta _{p}\end{bmatrix}},\qquad \mathbf {y} ={\begin{bmatrix}y_{1}\\y_{2}\\\vdots \\y_{n}\end{bmatrix}}.}$


__Linear Modeling Function:__

</br>

${\displaystyle y_{i}=\beta _{1}x_{i1}+\beta _{2}x_{i2}+\cdots +\beta _{p}x_{ip}+\varepsilon _{i}}$

</br>

As a rule, the constant term is always included in the set of regressors $X$, say, by taking $x_{i1} = 1$ for all observations $i = 1, …, n$. The coefficient $\beta_1$ corresponding to this regressor is called the intercept.

In [37]:
xdata = np.asarray(data.TV)
ones  = np.ones(shape=len(data.TV))  

# EITHER OF THE FOLLOWING X'S WORK. THEY ARE THE SAME. 
# X = np.asarray(list(zip(ones, xdata)))
# X = np.vstack([ones, xdata]).T
# X = np.asmatrix([ones, xdata]).T


# OR, A BETTER SOLUTION:
X = np.matrix([ones, xdata]).T  # shape(200, 2)
y = np.asarray(data.sales)

print(X.shape)            # (200, 2)
print(y.shape)            # (200, )
print(np.vstack(y).shape) # (200, 1)

(200, 2)
(200,)
(200, 1)


In [38]:
beta = ((X.T * X)**-1) * X.T * np.vstack(y)
beta

matrix([[7.03259355],
        [0.04753664]])

__Although X represented as a numpy array looks the same as the matrix of X and has the same shape, it will not work in the calculation of the coefficients. Lets take a look.__


In [47]:
X.shape

(200, 2)

In [48]:
X

array([[  1. , 230.1],
       [  1. ,  44.5],
       [  1. ,  17.2],
       [  1. , 151.5],
       [  1. , 180.8],
       [  1. ,   8.7],
       [  1. ,  57.5],
       [  1. , 120.2],
       [  1. ,   8.6],
       [  1. , 199.8],
       [  1. ,  66.1],
       [  1. , 214.7],
       [  1. ,  23.8],
       [  1. ,  97.5],
       [  1. , 204.1],
       [  1. , 195.4],
       [  1. ,  67.8],
       [  1. , 281.4],
       [  1. ,  69.2],
       [  1. , 147.3],
       [  1. , 218.4],
       [  1. , 237.4],
       [  1. ,  13.2],
       [  1. , 228.3],
       [  1. ,  62.3],
       [  1. , 262.9],
       [  1. , 142.9],
       [  1. , 240.1],
       [  1. , 248.8],
       [  1. ,  70.6],
       [  1. , 292.9],
       [  1. , 112.9],
       [  1. ,  97.2],
       [  1. , 265.6],
       [  1. ,  95.7],
       [  1. , 290.7],
       [  1. , 266.9],
       [  1. ,  74.7],
       [  1. ,  43.1],
       [  1. , 228. ],
       [  1. , 202.5],
       [  1. , 177. ],
       [  1. , 293.6],
       [  1

In [49]:
# THIS HAS SHAPE (200, 2). 
X = np.asarray( [ones, xdata] ).T

In [50]:
X.shape

(200, 2)

In [51]:
X

array([[  1. , 230.1],
       [  1. ,  44.5],
       [  1. ,  17.2],
       [  1. , 151.5],
       [  1. , 180.8],
       [  1. ,   8.7],
       [  1. ,  57.5],
       [  1. , 120.2],
       [  1. ,   8.6],
       [  1. , 199.8],
       [  1. ,  66.1],
       [  1. , 214.7],
       [  1. ,  23.8],
       [  1. ,  97.5],
       [  1. , 204.1],
       [  1. , 195.4],
       [  1. ,  67.8],
       [  1. , 281.4],
       [  1. ,  69.2],
       [  1. , 147.3],
       [  1. , 218.4],
       [  1. , 237.4],
       [  1. ,  13.2],
       [  1. , 228.3],
       [  1. ,  62.3],
       [  1. , 262.9],
       [  1. , 142.9],
       [  1. , 240.1],
       [  1. , 248.8],
       [  1. ,  70.6],
       [  1. , 292.9],
       [  1. , 112.9],
       [  1. ,  97.2],
       [  1. , 265.6],
       [  1. ,  95.7],
       [  1. , 290.7],
       [  1. , 266.9],
       [  1. ,  74.7],
       [  1. ,  43.1],
       [  1. , 228. ],
       [  1. , 202.5],
       [  1. , 177. ],
       [  1. , 293.6],
       [  1


Computing beta will throw the following error: 

    ValueError: operands could not be broadcast together with shapes (2,200) (200,2) 

In [52]:
beta = ((X.T * X)**-1) * X.T * np.vstack(y)
beta

ValueError: operands could not be broadcast together with shapes (2,200) (200,2) 

#### The matrix:

In [53]:
X = np.matrix([ones, xdata]).T  # shape(200, 2)
X.T * np.vstack(y)

matrix([[  2804.5 ],
        [482108.34]])

#### The array:

In [54]:
X = np.asarray( [ones, xdata] ).T
X.T * np.vstack(y)

ValueError: operands could not be broadcast together with shapes (2,200) (200,1) 