# Gradient Descent

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from tqdm import tqdm

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

<h3>Import Data Set <code>Advertising.csv</code></h3>

In [None]:
df_Ad = pd.read_csv('Advertising.csv')
df_Ad.head()

In [None]:
df_Ad.info()

In [None]:
df_Ad.isnull().sum()

In [None]:
df_Ad.drop('Unnamed: 0', axis=1, inplace=True)

<h4> Calculate <code>Correlation Coefficient (Pearson's)</code> for display relation of data
$$r = \frac{\sum_{i=1}^{n}(x_i - \bar{x})(y_i - \bar{y})}{\sqrt{\sum_{i=1}^{n}(x_i - \bar{x})^2 \sum_{i=1}^{n}(y_i - \bar{y})^2}}$$
<h4>or use function<code>dataframe.corr()</code></h4>

In [None]:
sns.heatmap(df_Ad.corr(), annot=True)

<h3>Scatter Plot</h3>

In [None]:
go.Figure(go.Scatter(x=df_Ad.TV, y=df_Ad.Sales, mode='markers', name='Data'))

<h3>Data Spliting <code>Train Test Split</code></h3>

In [None]:
# Train test split
X = np.array(df_Ad.TV)
y = np.array(df_Ad.Sales)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) # 58 699199

In [None]:
print(f"X_train size = {X_train.size} \ny_train size = {y_train.size} \n------------------")
print(f"X_test size = {X_test.size} \ny_test size = {y_test.size}")

<h3>SSE (Sum of Squares Error) Calculation when {intercept = 0, slope = 0}</h3>
$$SSE=\sum_{i=0}^n(y_i - \hat{y_i})^2$$

In [None]:
# SSE when intercept and slope = 0
SSE_0 = np.sum((y_train - 0 + 0 * X_train)**2)

scientific_notation = "{:e}".format(SSE_0)
print(f"SSE_0 = {SSE_0}, {scientific_notation}")

<h3>Create function <code>gradient_descent()</code> for approximate parameter {Intercept, Slope} use</h3>
<h2>$$intercept_{new}=intercept_{old}-({\frac{\partial{SSE}}{\partial{intercept}}}\alpha)$$</h2>
<h2>$$slope_{new}=slope_{old}-({\frac{\partial{SSE}}{\partial{slope}}}\alpha)$$</h2>
<h3>where $$ \alpha = Learning Rate$$</h3>

In [None]:
def gradeint_descent(X, y, learning_rate, iteration, round_epochs):
    intercept = 0
    slope = 0
    sse_list = []
    with tqdm(total=iteration) as pbar:
        for i in range(iteration):
            sse = np.sum((y - intercept - slope * X)**2)
            diff_sse_intercept = np.sum(-2 * (y - intercept - slope * X))
            diff_sse_slope = np.sum(-2 * (y - intercept - slope * X) * X)

            step_size_intercept = diff_sse_intercept * learning_rate
            step_size_slope = diff_sse_slope * learning_rate

            intercept = intercept - step_size_intercept
            slope = slope - step_size_slope

            sse_list.append(sse)

            if (i + 1) % round_epochs == 0:
                pbar.set_description(f'Iterate: {i + 1}, SSE: {sse}')
                pbar.update(round_epochs)
    return intercept, slope, sse_list

<h3>Approximation Parameter Values</h3>

In [None]:
iteration = 1000000 # 1M
learn_rate = 1e-7
intercept, slope, sse_list = gradeint_descent(X_train, y_train, learn_rate, iteration, 10000)
print(f"Intercept = {intercept}, slope = {slope}, \nMaximum of SSE = {sse_list[0]} \nMinimum of SSE = {sse_list[-1]}")

rng = np.arange(iteration)
plt.plot(sse_list, rng)
plt.show()

<h3>Scatter Plot with Linear Regression Line</h3>

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x = X_train, y = y_train, mode = 'markers', name = 'Data Point'))
fig.add_trace(go.Scatter(x = X_train, y = intercept + slope * X_train, mode = 'lines', name = 'Linear Regression'))
fig.update_layout(
    xaxis_title="TV (Ad Spending)",
    yaxis_title="Sales"
)

<h3>SSE (Sum of Squares Error) Calculation after approximate parameter values</h3>
$$SSE=\sum_{i=0}^n(y_i - \hat{y_i})^2$$

In [None]:
# SSE after approximate intercept and slope = 0
y_pred_train_set = intercept + slope * X_train

SSE = np.sum((y_train - y_pred_train_set)**2)

scientific_notation = "{:e}".format(SSE)
print(f"SSE = {SSE}, {scientific_notation}")

<h3>R-Squared</h3>


In [None]:
y_pred_test_set = intercept + (slope * X_test)

r2 = r2_score(y_test, y_pred_test_set)
print(f"R-squared: {r2}")

In [None]:
# Prediction
Ad_spen_TV = float(input())
predict_sales = intercept + slope * Ad_spen_TV
print(f"Ad Spending of TV = {Ad_spen_TV} units\n----------------------------------\nPrediction of Sales = {round(predict_sales, 4)} units") 

In [None]:
import gradio as gr

def prediction(X):
    y_pred = intercept + slope * X
    return round(y_pred, 4)

demo = gr.Interface(
    fn=prediction,
    inputs=["number"],
    outputs=["text"],
)

demo.launch()
