# Linear Regression

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

sns.set_style("whitegrid")

In [None]:
# Install sklearn library:
!pip install -U scikit-learn

In [None]:
# Helper functions

def draw_grid(x_lim=np.array([-4, 4]), y_lim=np.array([-4, 4])):
    """Draw an empty grid"""
    ax = plt.gca()
    # Draw ticks and grid
    for i in range(int(x_lim.min()), int(x_lim.max())):
        ax.axvline(i, linestyle='--', color='#ecf0f1', zorder=0)
        ax.plot([i, i], [0.05, -0.05], color='#2c3e50')
    for i in range(int(y_lim.min()), int(y_lim.max())):
        ax.axhline(i, linestyle='--', color='#ecf0f1', zorder=0)
        ax.plot([0.05, -0.05], [i, i], color='#2c3e50')
    # x and y axis
    ax.axhline(0, color='#2c3e50', zorder=0)
    ax.axvline(0, color='#2c3e50', zorder=0)
    ax.scatter([0], [0], color='#c0392b', zorder=0)

    ax.grid(False)
    ax.set_xlim(x_lim)
    ax.set_ylim(y_lim)

## Definition

Linear Regression 
- supervised machine learning algorithm
- solves a **regression** problem. 
- **Input**: **vector** $x \in R^n$ 
- **Output**: **scalar** $y \in R$.
- The value that our model predicts y is called $\hat{y}$, which is defined as:

$$
\hat{y} = w_1x_1 + w_2x_2 + \dots + w_nx_n + b = b + \sum^n{w_ix_i} = w^Tx + b
$$

where

<div align="center">

$w \in R^n$, and $b \in R$ are parameters

$w$ is the vector of **coefficients**, also known as set of **weights**

$b$ is the **intercept**, also known as the **bias**

</div>

<div align="center">
    <img src="https://i.imgur.com/b7zoo7n.png" width="600" />
</div>

Our goal is to find the best fitting line (or hyperplane) that **minimizes mean squared error (MSE)** between our target variable (y) and our predicted output $\hat y$ over all samples in our dataset

$$SSE = \sum_{i=1}^{n}(y - \hat y)^2$$
$$MSE = \frac{1}{n}SSE$$

This is also known as **Ordinary Least Squares (OLS) Linear Regression**



**That means we need to estimate parameters w and b**

We will use an **optimization algorithm** known as **Gradient Descent** (and later Stochastic Gradient Descent or SGD) to solve this problem

In [None]:
x = np.array([0, 1, 2, 3, 4, 5], dtype=int) # years of experience
y = np.array([2.75, 7.2, 9.4, 16.9, 17.24, 24.32], dtype=float) # salary

def line_lr(x):
    return w*x + b

def mse(y, y_hat):
    return ((y_hat - y)**2).mean()

👉 Senario 1: w = 3.2, b = 5

In [None]:
w = 3.2
b = 5

# -------------------PLOT CHART-------------------------
y_hat = line_lr(x) # calculate the prediction
line_x = np.arange(-1, 7)

plt.figure(figsize=(10, 5))

draw_grid(x_lim=np.array([-1, 6]), y_lim=np.array([-1, 25]))

plt.scatter(x, y, label='y')
plt.scatter(x, y_hat, c='r', label='y^')
plt.plot(line_x, line_lr(line_x), c='r')
for i, y_i in enumerate(y):
    plt.plot([i, i], [y_i, line_lr(i)], linestyle='--', c='g')
plt.title(f'Loss function L(w,b) = {mse(y, y_hat)} \n for w={w} and b={b}, y^={w}x + {b}', fontsize=18)

plt.xlabel('Years of experience')
plt.ylabel('Salary')
plt.legend()

plt.show()

👉 Senario 2: w = 4.2, b = 3

In [None]:
w = 4
b = 3

# -------------------PLOT CHART-------------------------
y_hat = line_lr(x) # calculate the prediction
line_x = np.arange(-1, 7)

plt.figure(figsize=(10, 5))

draw_grid(x_lim=np.array([-1, 6]), y_lim=np.array([-1, 25]))

plt.scatter(x, y, label='y')
plt.scatter(x, y_hat, c='r', label='y^')
plt.plot(line_x, line_lr(line_x), c='r')
for i, y_i in enumerate(y):
    plt.plot([i, i], [y_i, line_lr(i)], linestyle='--', c='g')
plt.title(f'Loss function L(w,b) = {mse(y, y_hat)} \n for w={w} and b={b}, y^={w}x + {b}', fontsize=18)

plt.xlabel('Years of experience')
plt.ylabel('Salary')
plt.legend()

plt.show()

# Get data

We are given the dataset with the following columns (features): how much a company spends on Radio, TV and Newspaper advertising each year and its annual Sales in terms of units sold. 

We are trying to develop an equation that will let us to **predict units sold in thousands (Sales column) based on how much a company spends on advertising (US dollar, in thousand)**. The rows represent companies.

In [None]:
df = pd.read_csv('data\\advertising.csv')

In [None]:
df.head()

Using a real dataset above, we will estimate of sales given a company's TV  advertising spent


In [None]:
X = df[['TV']]
y = df[['Sales']]

In [None]:
plt.figure(figsize = (8,5))
sns.scatterplot(x = X['TV'], y = y['Sales'])
plt.title('TV vs SALES')
plt.show()

$$Sales = TV*Weight + Bias$$
$$y = Xw + b$$
- **TV is our independent variable**. In machine learning we also call these variables **features**.
- **Sales is dependent variable**. This is what we have to predict
- **Weight is the coefficient** for the TV independent variable. In machine learning we call coefficients weights.
- **Bias is the intercept where our line intercepts the y-axis**. In machine learning we can call intercepts bias. Bias offsets all predictions that we make.


We will try to **learn the correct values for Weight and Bias**. By the end of our training, our equation will approximate the line of best fit.

# Single Feature Linear Regression

## Train Test Split:

<img src="https://cdn-coiao.nitrocdn.com/CYHudqJZsSxQpAPzLkHFOkuzFKDpEHGF/assets/static/optimized/rev-85bf93c/wp-content/uploads/2022/05/sklearn-train-test-split_syntax-explanation_v2.png" width="600">

In [None]:
df.head()

In [None]:
# Import train test split function:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101)

print('X train shape', X_train.shape)
print('y train shape', y_train.shape)
print('X test shape', X_test.shape)
print('y test shape', y_test.shape)

In [None]:
plt.figure(figsize = (8,5))
sns.scatterplot(x = X_train['TV'], y = y_train['Sales'], label = 'Train data')
sns.scatterplot(x = X_test['TV'], y = y_test['Sales'], color = 'red', label = 'Test data')
plt.title('TV vs SALES ')
plt.show()

## Import and Train model:

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [None]:
# Call model:
lr1 = LinearRegression()
lr1

In [None]:
# Fit Train set (Train model)
lr1.fit(X_train, y_train)
lr1 #<= trained model

In [None]:
# Lấy weight và bias:
w = lr1.coef_
b = lr1.intercept_

print('weight', w)
print('bias', b)


In [None]:
w = lr1.coef_
b = lr1.intercept_

print(f'Weight: {w}')
print(f'Bias: {b}')
print(f'MSE: {mean_squared_error(y_train, y_train_pred)}')

plt.figure(figsize = (8,5))
sns.scatterplot(x = X_train['TV'], y = y_train['Sales'])
sns.scatterplot(x = X_test['TV'], y = y_test['Sales'], color = 'red')

plt.plot(np.array(X), lr1.predict(X))
plt.title(f'y = {w[0][0]}x + {b[0]}', fontsize = 15)

plt.show()

## Loss function

As mentioned above, we will use Mean Square Error as our loss function

A loss function is a wrapper around our model function that tells us "how good" our model is at making predictions for a given set of parameters. The loss function has its own curve and its own derivatives. The slope of this curve tells us the direction we should update our weights to make the model more accurate!

For our simple linear equation:

y = wx + b

MSE can be calculated with the formula:

$$MSE =  \frac{1}{N} \sum_{i=1}^{n} (y_i - (wx_i + b))^2$$
- $N$ is the total number of observations (data points)
- $\frac{1}{N} \sum_{i=1}^{n}$ is the mean
- $y_i$ is the actual value of an observation and $(mx_i + b)$ is our prediction

👉 Here are three common evaluation metrics for regression problems:

**Mean Absolute Error** (MAE) is the mean of the absolute value of the errors:

$$\frac 1n\sum_{i=1}^n|y_i-\hat{y}_i|$$

**Mean Squared Error** (MSE) is the mean of the squared errors:

$$\frac 1n\sum_{i=1}^n(y_i-\hat{y}_i)^2$$

**Root Mean Squared Error** (RMSE) is the square root of the mean of the squared errors:

$$\sqrt{\frac 1n\sum_{i=1}^n(y_i-\hat{y}_i)^2}$$

Comparing these metrics:

- **MAE** is the easiest to understand, because it's the average error.
- **MSE** is more popular than MAE, because MSE "punishes" larger errors, which tends to be useful in the real world.
- **RMSE** is even more popular than MSE, because RMSE is interpretable in the "y" units.

All of these are **loss functions**, because we want to minimize them.

In [None]:
# predict on train set:
y_train_pred = lr1.predict(X_train)

# Loss functions on train data:
print('>>> On Training data: <<<')
print('MSE:', mean_squared_error(y_train, y_train_pred))
print('MAE:', mean_absolute_error(y_train, y_train_pred))
print('RMSE:', mean_squared_error(y_train, y_train_pred)**0.5)


In [None]:
# Make prediction on testset:
y_test_prd = lr1.predict(X_test)

# Loss functions on test data:
print('>>> On Testing data <<<')
print('MSE:', mean_squared_error(y_test, y_test_prd))
print('MAE:', mean_absolute_error(y_test, y_test_prd))
print('RMSE:', mean_squared_error(y_test, y_test_prd)**0.5)


In [None]:
# Make MSE colections:
mse_dct = {}
mse_dct['TV'] = [mean_squared_error(y_train, y_train_pred), mean_squared_error(y_test, y_test_prd)]
mse_dct

##### 🤔 Predict Sales when the company pay for TV adv 200 USD!

In [None]:
# First let's see the shape of X:
X.shape

In [None]:
# Your input must be a 2-d array:
ip = np.array([[200]])
ip.shape

In [None]:
lr1.predict(ip)

👉 when the company pay 200 USD for TV adv. they will get 16.45 USD in sales

In [None]:
# Your turn: predict company sales when they pay 300 USD on TV adv.
ip2 = np.array([[300]])
lr1.predict(ip2)

# Multiple linear regression

Now we will consider all the features for our sale prediction

In [None]:
df

In [None]:
# Features: TV, Radio, Newspaper
X = df[['TV', 'Radio', 'Newspaper']]
y = df[['Sales']]
X.shape, y.shape

In [None]:
# Train test split:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101)

print('X train shape', X_train.shape)
print('y train shape', y_train.shape)
print('X test shape', X_test.shape)
print('y test shape', y_test.shape)

In [None]:
# Train model (fit model):
lr2 = LinearRegression()
lr2.fit(X_train, y_train)


In [None]:
# Weights and bias:
print(f'Weights: {lr2.coef_}')
print(f'Bias: {lr2.intercept_}')

In [None]:
# Prediction on Trainset:
y_train_pred_2 = lr2.predict(X_train)
y_train_pred_2[:5]

In [None]:
# predict on testset:
y_test_prd_2 = lr2.predict(X_test)
y_test_prd_2[:5]

### Evaluation:

In [None]:
# Loss functions on train data:
print('>>> On Training data: <<<')
print('MSE:', mean_squared_error(y_train, y_train_pred_2))
print('MAE:', mean_absolute_error(y_train, y_train_pred_2))
print('RMSE:', mean_squared_error(y_train, y_train_pred_2)**0.5)

In [None]:
# Predict on Test-set:
y_test_pred_2 = lr2.predict(X_test)
# Loss functions on test data:
print('>>> On Testing data <<<')
print('MSE:', mean_squared_error(y_test, y_test_pred_2))
print('MAE:', mean_absolute_error(y_test, y_test_pred_2))
print('RMSE:', mean_squared_error(y_test, y_test_pred_2)**0.5)


In [None]:
# MSE collection:
mse_dct['TV, Radio, Newspaper'] = [mean_squared_error(y_train, y_train_pred_2), mean_squared_error(y_test, y_test_pred_2)]

# Make plot dataframe:
dfplot = pd.DataFrame(mse_dct).T.reset_index()
dfplot.columns = ['case', 'Train - MSE', 'Test - MSE']
dfplot = pd.melt(dfplot, id_vars= 'case', value_vars= ['Train - MSE', 'Test - MSE'])

# Plot:
plt.figure(figsize=(10, 5))
ax = sns.barplot(data = dfplot, x = 'case', y = 'value', hue = 'variable')
plt.title('MSE COMPARISON')
for i in range(dfplot.variable.nunique()):
    plt.bar_label(ax.containers[i])
plt.show()

# Transformer - MinMaxScaler:

Documentation: https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html

Transform features by scaling each feature to a given range.

#### 🤔 Example:

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
# Example data:
data = np.array([[-1, 2], [-0.8, 6], [0, 10], [1, 18]])
print(data)


In [None]:
# Scale example data to 0-1:
scaler = MinMaxScaler((0, 1))
scaler.fit(data)

In [None]:
data

In [None]:
print(scaler.transform(data))

#### 👉 Apply to Advertise data:

In [None]:
# Khai báo scaler:
scaler = MinMaxScaler()
scaler

In [None]:
# Fit on trainset:
scaler.fit(X_train)

In [None]:
# Trainsform data on trainset and testset:
X_train_scaler = scaler.transform(X_train)
X_test_scaler = scaler.transform(X_test)

In [None]:
X_train.head()

In [None]:
X_train_scaler[:5]

In [None]:
plt.figure(figsize=(12, 4))

plt.subplot(121)
sns.histplot(X_train['TV'])
plt.title('Before using MinMaxScaler')

plt.subplot(122)
sns.histplot(X_train_scaler[:, 0])
plt.title('After using MinMaxScaler')
plt.xlabel('TV')

plt.show()

**Train model:**

In [None]:
# Train model (fit model):
lr3 = LinearRegression()
lr3.fit(X_train_scaler, y_train)


### Evaluation

In [None]:
# Prediction on Trainset:
y_train_pred_3 = lr3.predict(X_train_scaler)
print(f'Train-set MSE: {mean_squared_error(y_train, y_train_pred_3)}')

In [None]:
# Predict on Test-set:
y_test_pred_3 = lr3.predict(X_test_scaler)
print(f'Test-set MSE: {mean_squared_error(y_test, y_test_pred_3)}')


In [None]:
# MSE collection:
mse_dct['TV, Radio, Newspaper \n+ MinMaxScaler'] = [mean_squared_error(y_train, y_train_pred_3),\
                                                   mean_squared_error(y_test, y_test_pred_3)]

# Make plot dataframe:
dfplot = pd.DataFrame(mse_dct).T.reset_index()
dfplot.columns = ['case', 'Train - MSE', 'Test - MSE']
dfplot = pd.melt(dfplot, id_vars= 'case', value_vars= ['Train - MSE', 'Test - MSE'])

# Plot:
plt.figure(figsize=(10, 5))
ax = sns.barplot(data = dfplot, x = 'case', y = 'value', hue = 'variable')
plt.title('MSE COMPARISON')
for i in range(dfplot.variable.nunique()):
    plt.bar_label(ax.containers[i])
plt.show()

## Polynomial Linear Regression & Overfitting/Underfitting

<img src="https://static.javatpoint.com/tutorial/machine-learning/images/machine-learning-polynomial-regression.png" width="600">

A polynomial degree 1 gives us the simple linear regression model:

$$
\hat{y} = wx + b,\ with\ x, \hat{y} \in R
$$

By adding $x^2$ as another feature, the model becomes a quadratic function of $x$:

$$
\hat{y} = w_1x + w_2x^2 + b
$$

We can continue to add more powers of $x$ as additional features to obtain a polynomial of degree $n$

$$
\hat{y} = b + \sum_{i=1}^n{w_ix^i}
$$

In [None]:
df = pd.read_csv('data\\poly_example_1.csv')
x = df[['X']].values
y = df[['Y']].values

In [None]:
df.head()

In [None]:
# import PolynomialFeatures
from sklearn.preprocessing import PolynomialFeatures

In [None]:
# Define a instance of PolynomialFeatures with degree=2 called 'poly_reg'
poly_reg = PolynomialFeatures(degree = 3, include_bias= False)

# Fit and transform 'X_train', 'X_test' with the PolynomialFeatures and save the result in 'X_train_poly' and 'X_test_poly'
X_poly = poly_reg.fit_transform(x) 



In [None]:
x[:5], X_poly[:5]

#### **Example**

In [None]:
df.head()

In [None]:
x = df[['X']].values
y = df[['Y']].values

# Let's plot the dataset using plt.scatter()
plt.figure(figsize=(10, 5))

# Your code here
plt.scatter(x, y)

plt.show()

In [None]:
# Split the data in 80% training and 20% validation
from sklearn.model_selection import train_test_split

X_train1, X_test1, y_train1, y_test1 = train_test_split(x, y, test_size=0.2, random_state=102)

print('X train shape', X_train1.shape)
print('y train shape', y_train1.shape)
print('X test shape', X_test1.shape)
print('y test shape', y_test1.shape)

In [None]:
plt.figure(figsize=(10, 5))
plt.scatter(X_train1, y_train1)
plt.scatter(X_test1, y_test1)
plt.show()

In [None]:

degree = 20 # hyperparameter

# Define a instance of LinearRegression called 'lr'
lr = LinearRegression()

# Define a instance of PolynomialFeatures with degree called 'poly_reg'
poly_reg = PolynomialFeatures(degree = degree) 

# Fit and transform 'X_train1', 'X_test1' with the PolynomialFeatures and save the result in 'X_train1_poly' and 'X_test1_poly'
X_train1_poly = poly_reg.fit_transform(X_train1)
X_test1_poly = poly_reg.fit_transform(X_test1)

# Fitting the Linear Regression Model to the training set (X_train1_poly)
lr.fit(X_train1_poly, y_train1)

# Predict test set and save the result in 'y_test1_predict'
y_test1_predict = lr.predict(X_test1_poly)
y_train1_predict = lr.predict(X_train1_poly)

print('degree:', degree)
print(f'MSE on train set: {mean_squared_error(y_train1,y_train1_predict)}')
print(f'MSE on validation set: {mean_squared_error(y_test1,y_test1_predict)}')


# plotting purposes
x_plot = np.linspace(x.min(), x.max(), 1000).reshape(-1, 1)
y_plot = lr.predict(poly_reg.fit_transform(x_plot))
plt.figure(figsize=(10, 5))
plt.plot(x_plot, y_plot)
plt.scatter(X_train1, y_train1)
plt.scatter(X_test1, y_test1)
plt.xlim(-6, 6)
plt.ylim(-40, 40)
plt.show()

In [None]:
# Lặp qua các degree và tính loss function trên trainset và testset:
results = {'degree': [], 'train_mse': [], 'test_mse': []}

for d in range(1, 22):

    degree = d

    # Define a instance of LinearRegression called 'lr'
    lr = LinearRegression()

    # Define a instance of PolynomialFeatures with degree called 'poly_reg'
    poly_reg = PolynomialFeatures(degree = degree) 

    # Fit and transform 'X_train1', 'X_test1' with the PolynomialFeatures and save the result in 'X_train1_poly' and 'X_test1_poly'
    X_train1_poly = poly_reg.fit_transform(X_train1)
    X_test1_poly = poly_reg.fit_transform(X_test1)

    # Fitting the Linear Regression Model to the training set (X_train1_poly)
    lr.fit(X_train1_poly, y_train1)

    # Predict test set and save the result in 'y_test1_predict'
    y_test1_predict = lr.predict(X_test1_poly)
    y_train1_predict = lr.predict(X_train1_poly)

    # Save to result dict:
    results['degree'].append(d)
    results['train_mse'].append(mean_squared_error(y_train1,y_train1_predict))
    results['test_mse'].append(mean_squared_error(y_test1,y_test1_predict))

result_df = pd.DataFrame(results)
result_df

In [None]:
fig = plt.subplots(figsize =(12, 6))
 
# Make the plot
plt.plot(result_df['degree'], result_df['train_mse'],label ='Train MSE')
plt.plot(result_df['degree'], result_df['test_mse'], label ='Test MSE')
 
# Adding Xticks
plt.xlabel('Polynomial Degree')
plt.ylabel('Loss')

plt.xticks(result_df['degree'])
 
plt.legend()
plt.show()

![](https://www.researchgate.net/profile/Hayder_Al-Behadili/publication/325999203/figure/fig4/AS:641844216074241@1530038994324/Overfitting-and-underfitting-effect-on-error.png)

#### 👉 Apply to Advertise data 

In [None]:
X_train.head()

In [None]:
# Lặp qua các degree và tính loss function trên trainset và testset khi áp dụng polynomialFeature
results = {'degree': [], 'train_mse': [], 'test_mse': []}

for d in range(1, 7):

    degree = d

    # Define a instance of LinearRegression called 'lr'
    lr = LinearRegression()

    # Define a instance of PolynomialFeatures with degree called 'poly_reg'
    poly_reg = PolynomialFeatures(degree = degree) 

    # Fit and transform 'X_train_scaler', 'X_test_scaler' with the PolynomialFeatures and save the result in 'X_train_poly' and 'X_test_poly'
    X_train_poly = poly_reg.fit_transform(X_train_scaler)
    X_test_poly = poly_reg.fit_transform(X_test_scaler)

    # Fitting the Linear Regression Model to the training set (X_train_poly)
    lr.fit(X_train_poly, y_train)

    # Predict test set and save the result in 'y_test_predict'
    y_test_predict = lr.predict(X_test_poly)
    y_train_predict = lr.predict(X_train_poly)

    # Save to result dict:
    results['degree'].append(d)
    results['train_mse'].append(mean_squared_error(y_train,y_train_predict))
    results['test_mse'].append(mean_squared_error(y_test,y_test_predict))

result_df = pd.DataFrame(results)
result_df.head()

# Plot MSE 
fig = plt.subplots(figsize =(12, 5))
# Make the plot
plt.plot(result_df['degree'], result_df['train_mse'],label ='Train MSE')
plt.plot(result_df['degree'], result_df['test_mse'], label ='Test MSE')
 
# Adding Xticks
plt.xlabel('Polynomial Degree')
plt.ylabel('Loss')

plt.xticks(result_df['degree'])
 
plt.legend()
plt.show()

✳️ with degree = 5 give us the best model

In [None]:
# Call Linear Regression model:
lr4 = LinearRegression()

# Call PolynomialFeatures :
poly_reg = PolynomialFeatures(degree = 5)

# Fit and transform 'X_train_scaler', 'X_test_scaler' with the PolynomialFeatures and save the result in 'X_train_poly' and 'X_test_poly'
X_train_poly = poly_reg.fit_transform(X_train_scaler)
X_test_poly = poly_reg.fit_transform(X_test_scaler)

# Fitting (Training model) the Linear Regression Model to the training set (X_train_poly)
lr4.fit(X_train_poly, y_train)


### Evaluation

In [None]:
# Prediction on Trainset:
y_train_pred_4 = lr4.predict(X_train_poly)
print(f'Train-set MSE: {mean_squared_error(y_train, y_train_pred_4)}')

In [None]:
# Predict on Test-set:
y_test_pred_4 = lr4.predict(X_test_poly)
print(f'Test-set MSE: {mean_squared_error(y_test, y_test_pred_4)}')

In [None]:
# MSE collection:
mse_dct['TV, Radio, Newspaper \n+ MinMaxScaler \n+ PolynomialFeatures'] = [mean_squared_error(y_train, y_train_pred_4),\
                                                   mean_squared_error(y_test, y_test_pred_4)]

# Make plot dataframe:
dfplot = pd.DataFrame(mse_dct).T.reset_index()
dfplot.columns = ['case', 'Train - MSE', 'Test - MSE']
dfplot = pd.melt(dfplot, id_vars= 'case', value_vars= ['Train - MSE', 'Test - MSE'])

# Plot:
plt.figure(figsize=(10, 5))
ax = sns.barplot(data = dfplot, x = 'case', y = 'value', hue = 'variable')
plt.title('MSE COMPARISON')
for i in range(dfplot.variable.nunique()):
    plt.bar_label(ax.containers[i])
plt.show()

In [None]:
X_train_scaler.shape

🤔 **Predict sales with paying adv. for TV: 200; Radio: 20; Newspaper: 30**

In [None]:
# Predict Sales when company pay
#step1: create input:
ip_arr = np.array([[200, 20, 30]])

#step2: using MinMaxScaler:
ip_scl = scaler.transform(ip_arr)

#step3: using polynomial:
ip_scl_poly = poly_reg.transform(ip_scl)

#step4: predict!
lr4.predict(ip_scl_poly)

# GEATE JOB! 😉