Simple Linear Regression

In [21]:
import numpy as np
from sklearn.linear_model import LinearRegression

In [58]:
# x = np.array([5, 15, 25, 35, 45, 55]).reshape((-1, 1))
# y = np.array([21, 41, 61, 81, 101, 121])

x = np.array([5, 15, 25, 35, 45, 55]).reshape((-1, 1))
y = np.array([5, 20, 14, 32, 22, 38])

In [59]:
x

array([[ 5],
       [15],
       [25],
       [35],
       [45],
       [55]])

In [60]:
y

array([ 5, 20, 14, 32, 22, 38])

In [61]:
x.shape

(6, 1)

In [62]:
y.shape

(6,)

In [63]:
model = LinearRegression()

In [64]:
model.fit(x, y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [65]:
 r_sq = model.score(x, y)
r_sq

0.715875613747954

In [66]:
print('coefficient of determination:', r_sq)

coefficient of determination: 0.715875613747954


When you’re applying .score(), the arguments are also the predictor x and regressor y, and the return value is 𝑅².

In [67]:
print('intercept:', model.intercept_)

intercept: 5.633333333333329


In [68]:
print('slope:', model.coef_)

slope: [0.54]


so it y = 0.54 * x + 5.6333

In [69]:
 y_pred = model.predict(x)

In [70]:
 print('predicted response:', y_pred, sep='\n')

predicted response:
[ 8.33333333 13.73333333 19.13333333 24.53333333 29.93333333 35.33333333]


In [71]:
print(y)

[ 5 20 14 32 22 38]


In [72]:
 x_new = np.arange(5).reshape((-1, 1))

In [73]:
print(x_new)

[[0]
 [1]
 [2]
 [3]
 [4]]


In [74]:
y_new = model.predict(x_new)

In [75]:
 print(y_new)

[5.63333333 6.17333333 6.71333333 7.25333333 7.79333333]


Multiple Linear Regression

In [76]:
x = [[0, 1], [5, 1], [15, 2], [25, 5], [35, 11], [45, 15], [55, 34], [60, 35]]
y = [4, 5, 20, 14, 32, 22, 38, 43]
x, y = np.array(x), np.array(y)

In [77]:
x.shape

(8, 2)

In [78]:
y.shape


(8,)

In [79]:
model = LinearRegression().fit(x, y)

In [81]:
 r_sq = model.score(x, y)

In [82]:
print('coefficient of determination:', r_sq)

coefficient of determination: 0.8615939258756776


In [83]:
print('intercept:', model.intercept_)

intercept: 5.52257927519819


In [84]:
print('slope:', model.coef_)

slope: [0.44706965 0.25502548]


In [85]:
y_pred = model.predict(x)

In [86]:
print('predicted response:', y_pred, sep='\n')

predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]


In [90]:
y

array([ 4,  5, 20, 14, 32, 22, 38, 43])

In [88]:
x_new = np.arange(10).reshape((-1, 2))

In [89]:
y_new = model.predict(x_new)

Advanced Linear Regression With statsmodels

In [91]:
import statsmodels.api as sm

In [101]:
x = [[0, 1], [5, 1], [15, 2], [25, 5], [35, 11], [45, 15], [55, 34], [60, 35]]
y = [4, 5, 20, 14, 32, 22, 38, 43]
x, y = np.array(x), np.array(y)
x

array([[ 0,  1],
       [ 5,  1],
       [15,  2],
       [25,  5],
       [35, 11],
       [45, 15],
       [55, 34],
       [60, 35]])

In [102]:
x.shape

(8, 2)

In [103]:
x = sm.add_constant(x)

In [104]:
x

array([[ 1.,  0.,  1.],
       [ 1.,  5.,  1.],
       [ 1., 15.,  2.],
       [ 1., 25.,  5.],
       [ 1., 35., 11.],
       [ 1., 45., 15.],
       [ 1., 55., 34.],
       [ 1., 60., 35.]])

In [105]:
x.shape

(8, 3)

In [106]:
x.shape

(8, 3)

In [108]:
model = sm.OLS(y, x)
results = model.fit()

In [109]:
results.summary()

  "anyway, n=%i" % int(n))


0,1,2,3
Dep. Variable:,y,R-squared:,0.862
Model:,OLS,Adj. R-squared:,0.806
Method:,Least Squares,F-statistic:,15.56
Date:,"Sun, 03 Nov 2019",Prob (F-statistic):,0.00713
Time:,10:45:34,Log-Likelihood:,-24.316
No. Observations:,8,AIC:,54.63
Df Residuals:,5,BIC:,54.87
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,5.5226,4.431,1.246,0.268,-5.867,16.912
x1,0.4471,0.285,1.567,0.178,-0.286,1.180
x2,0.2550,0.453,0.563,0.598,-0.910,1.420

0,1,2,3
Omnibus:,0.561,Durbin-Watson:,3.268
Prob(Omnibus):,0.755,Jarque-Bera (JB):,0.534
Skew:,0.38,Prob(JB):,0.766
Kurtosis:,1.987,Cond. No.,80.1


In [110]:
 print('coefficient of determination:', results.rsquared)

coefficient of determination: 0.8615939258756777


In [111]:
print('adjusted coefficient of determination:', results.rsquared_adj)

adjusted coefficient of determination: 0.8062314962259488


In [112]:
print('regression coefficients:', results.params)

regression coefficients: [5.52257928 0.44706965 0.25502548]


In [113]:
 print('predicted response:', results.fittedvalues, sep='\n')

predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]


In [114]:
print('predicted response:', results.predict(x), sep='\n')

predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]


In [115]:
x_new = sm.add_constant(np.arange(10).reshape((-1, 2)))

In [116]:
y_new = results.predict(x_new)

In [117]:
y_new

array([ 5.77760476,  7.18179502,  8.58598528,  9.99017554, 11.3943658 ])