**Non-Linear Regression**
  * yh = w0 + w1x1 + w2x2 +....+ wnxn => linear regression
  * yh = w0 + w1x1 + w2x2 + w3x1x2 => non-linear
  * w3x1x2 -> feature interaction

In [1]:
import pandas as pd 

df = pd.read_csv('https://raw.githubusercontent.com/bipulshahi/Dataset/main/Advertising.csv', index_col = 0)

df.head()

Unnamed: 0,TV,radio,newspaper,sales
1,230.1,37.8,69.2,22.1
2,44.5,39.3,45.1,10.4
3,17.2,45.9,69.3,9.3
4,151.5,41.3,58.5,18.5
5,180.8,10.8,58.4,12.9


In [2]:
X = df[['TV','radio']]
X['TVR'] = X.TV * X.radio

Y = df['sales']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['TVR'] = X.TV * X.radio


In [3]:
X.head()

Unnamed: 0,TV,radio,TVR
1,230.1,37.8,8697.78
2,44.5,39.3,1748.85
3,17.2,45.9,789.48
4,151.5,41.3,6256.95
5,180.8,10.8,1952.64


In [6]:


#split into train & test 
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest = train_test_split(X,Y,train_size=0.75)


#import algorithm from scikit-learn and train the model 
from sklearn.linear_model import LinearRegression
model1 = LinearRegression()

model1.fit(xtrain,ytrain)

print("Trained coeffecients =",model1.coef_, "intercept =" , model1.intercept_)


#Evaluate model performance
ytrainPred = model1.predict(xtrain)
ytestPred = model1.predict(xtest)

print("Train mean absolute error" , abs(ytrain - ytrainPred).mean())
print("Test mean absolute error" , abs(ytest - ytestPred).mean())

Trained coeffecients = [0.01852514 0.02517899 0.00110001] intercept = 6.861838720433315
Train mean absolute error 0.6541893414230887
Test mean absolute error 0.6699459803408618


**Non-Linear Regression of degree 2**

In [7]:
import numpy as np

n1 = np.random.randint(1,9,(5,1))
n1

array([[2],
       [4],
       [4],
       [6],
       [8]])

In [8]:
from sklearn.preprocessing import PolynomialFeatures
pol1 = PolynomialFeatures(degree=2,include_bias=False)

pol1.fit_transform(n1)

array([[ 2.,  4.],
       [ 4., 16.],
       [ 4., 16.],
       [ 6., 36.],
       [ 8., 64.]])

In [9]:
pol2 = PolynomialFeatures(degree=3,include_bias=False)

pol2.fit_transform(n1)

array([[  2.,   4.,   8.],
       [  4.,  16.,  64.],
       [  4.,  16.,  64.],
       [  6.,  36., 216.],
       [  8.,  64., 512.]])

In [10]:
n2 = np.random.randint(1,9,(5,2))
n2

array([[8, 5],
       [2, 8],
       [2, 6],
       [1, 8],
       [4, 1]])

In [11]:
pol3 = PolynomialFeatures(degree=2,include_bias=False)

pol3.fit_transform(n2)

array([[ 8.,  5., 64., 40., 25.],
       [ 2.,  8.,  4., 16., 64.],
       [ 2.,  6.,  4., 12., 36.],
       [ 1.,  8.,  1.,  8., 64.],
       [ 4.,  1., 16.,  4.,  1.]])

In [12]:
pol4 = PolynomialFeatures(degree=3,include_bias=False)

pol4.fit_transform(n2)

array([[  8.,   5.,  64.,  40.,  25., 512., 320., 200., 125.],
       [  2.,   8.,   4.,  16.,  64.,   8.,  32., 128., 512.],
       [  2.,   6.,   4.,  12.,  36.,   8.,  24.,  72., 216.],
       [  1.,   8.,   1.,   8.,  64.,   1.,   8.,  64., 512.],
       [  4.,   1.,  16.,   4.,   1.,  64.,  16.,   4.,   1.]])

# Non-Linear Regression on advertising 

In [13]:
df.head()

Unnamed: 0,TV,radio,newspaper,sales
1,230.1,37.8,69.2,22.1
2,44.5,39.3,45.1,10.4
3,17.2,45.9,69.3,9.3
4,151.5,41.3,58.5,18.5
5,180.8,10.8,58.4,12.9


In [14]:
X = df[['TV','radio']]
Y = df['sales']

In [15]:
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest = train_test_split(X,Y,train_size=0.75)

In [16]:
from sklearn.preprocessing import PolynomialFeatures
pol = PolynomialFeatures(degree = 2, include_bias=False)

pol.fit(xtrain)

xtrainPol = pol.transform(xtrain)
xtestPol = pol.transform(xtest)

In [17]:
print(xtrain.shape)
print(xtrainPol.shape)

(150, 2)
(150, 5)


In [18]:
#Train the model using xtrainPol & ytrain

from sklearn.linear_model import LinearRegression
model2 = LinearRegression()
model2.fit(xtrainPol,ytrain)

print(model2.coef_)
#Evaluate model performance

ytrainPred = model2.predict(xtrainPol)
ytestPred = model2.predict(xtestPol)

[ 4.76413878e-02  4.27323112e-02 -9.77353532e-05  1.04810876e-03
 -2.97069855e-05]


In [19]:
#Evaluate performance
print("Train mean absolute error" , abs(ytrain - ytrainPred).mean())
print("Test mean absolute error" , abs(ytest - ytestPred).mean())

Train mean absolute error 0.3572030002983661
Test mean absolute error 0.5261942007897548
