# Basic Linear Regression

In [3]:
import numpy as np
from sklearn.linear_model import LinearRegression

In [48]:
x = np.array([1,2,3,4,5,6,7,8,9,10])
y = np.array([3,6,9,12,15,18,21,24,27,30])

### You should call .reshape() on x because this array must be two-dimensional, or more precisely, it must have one column and as many rows as necessary. That’s exactly what the argument (-1, 1) of .reshape() specifies.

In [49]:
x = x.reshape(-1,1)
print(x)
print("\n",y)

[[ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]]

 [ 3  6  9 12 15 18 21 24 27 30]


### Creating model

In [50]:
model = LinearRegression()

In [51]:
model.fit(x,y)

LinearRegression()

### Accuray i.e R-Score (Ranges between 0 and 1, higher the R-Score, higher the accuracy in prediction)

In [52]:
r_sq = model.score(x, y)
print(r_sq)

1.0


In [53]:
y_pred = model.predict(x)
print(f"predicted response:\n\n{y_pred}")

predicted response:

[ 3.  6.  9. 12. 15. 18. 21. 24. 27. 30.]


In [54]:
model.predict([[11]])

array([33.])

### Testing

In [62]:
x_test = np.array([30,31,32,33,34]).reshape((-1, 1))
x_test

array([[30],
       [31],
       [32],
       [33],
       [34]])

In [64]:
y_pred = model.predict(x_test)
y_pred

array([ 90.,  93.,  96.,  99., 102.])

### You can notice that .intercept_ is a scalar, while .coef_ is an array.

In [65]:
print(f"Intercept: {model.intercept_}\n")

print(f"Slope: {model.coef_}")

Intercept: 3.552713678800501e-15

Slope: [3.]


# Basic Multiple Linear Regression

In [67]:
from sklearn.linear_model import LinearRegression

x = [[0, 1], [5, 1], [15, 2], [25, 5], [35, 11], [45, 15], [55, 34], [60, 35]]

y = [4, 5, 20, 14, 32, 22, 38, 43]

In [69]:
x, y = np.array(x), np.array(y)

In [81]:
x

array([[ 0,  1],
       [ 5,  1],
       [15,  2],
       [25,  5],
       [35, 11],
       [45, 15],
       [55, 34],
       [60, 35]])

In [83]:
mul_model = LinearRegression().fit(x, y)

In [88]:
r_sq = mul_model.score(x, y)

print(f"coefficient of determination: {r_sq}\n")

print(f"intercept: {model.intercept_}\n")

print(f"coefficients: {model.coef_}")

coefficient of determination: 0.8615939258756776

intercept: 3.552713678800501e-15

coefficients: [3.]


In [93]:
 y_pred = mul_model.predict(x)
print(f"Predicted response:\n\n{y_pred}")

Predicted response:

[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]


# Polynomial Regression

In [124]:
from sklearn.preprocessing import PolynomialFeatures

In [125]:
x = np.array([5, 15, 25, 35, 45, 55]).reshape((-1, 1))
y = np.array([15, 11, 2, 8, 25, 32])

In [136]:
transformer = PolynomialFeatures(degree=2, include_bias=False)

In [137]:
transformer.fit(x)

PolynomialFeatures(include_bias=False)

In [138]:
x_ = transformer.transform(x)

In [139]:
x_ = PolynomialFeatures(degree=2, include_bias=False).fit_transform(x)

In [141]:
x_

array([[   5.,   25.],
       [  15.,  225.],
       [  25.,  625.],
       [  35., 1225.],
       [  45., 2025.],
       [  55., 3025.]])

In [142]:
model = LinearRegression().fit(x_, y)

In [143]:
r_sq = model.score(x_, y)

print(f"coefficient of determination: {r_sq}\n")

print(f"intercept: {model.intercept_}\n")

print(f"coefficients: {model.coef_}")

coefficient of determination: 0.8908516262498563

intercept: 21.372321428571418

coefficients: [-1.32357143  0.02839286]


In [145]:
y_pred = model.predict(x_)
print(f"predicted response:\n{y_pred}")

predicted response:
[15.46428571  7.90714286  6.02857143  9.82857143 19.30714286 34.46428571]


# Using Statsmodel

In [160]:
import statsmodels.api as sm #For detailed info

In [161]:
x = [[0, 1], [5, 1], [15, 2], [25, 5], [35, 11], [45, 15], [55, 34], [60, 35]]
y = [4, 5, 20, 14, 32, 22, 38, 43]
x, y = np.array(x), np.array(y)

In [162]:
model = sm.OLS(y, x)

In [163]:
results = model.fit()

In [164]:
print(results.summary())

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.951
Model:                            OLS   Adj. R-squared (uncentered):              0.934
Method:                 Least Squares   F-statistic:                              57.86
Date:                Thu, 10 Nov 2022   Prob (F-statistic):                    0.000120
Time:                        10:58:18   Log-Likelihood:                         -25.398
No. Observations:                   8   AIC:                                      54.80
Df Residuals:                       6   BIC:                                      54.96
Df Model:                           2                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

