<div width="100%">
    <img width="100%" src="https://storage.googleapis.com/kaggle-datasets-images/418397/799497/8f5cf74367e40c83b8828f9950a803e8/dataset-cover.jpg" />
</div>

In [None]:
import math
import time

import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from IPython.display import clear_output

<h1 id="dataset" style="color:black; background:white; border:0.5px dotted black;"> 
    <center>Dataset
        <a class="anchor-link" href="#dataset" target="_self">¶</a>
    </center>
</h1>

## Load dataset

In [None]:
path = '../input/50-startups/50_Startups.csv'
df = pd.read_csv(path)
df = shuffle(df)
df.head()

## Features engineering

In [None]:
# group into one feature
df['Spent'] = df['R&D Spend'] + df['Administration'] + df['Marketing Spend']

# standardization of the features spent and profit
scaler = StandardScaler()
df[['Spent', 'Profit']] = scaler.fit_transform(df[['Spent', 'Profit']])

## Plot the Spent/Profit

In [None]:
plt.figure(figsize=(14,8))
plt.title("Spent vs Profit")
plt.xlabel("Spent")
plt.ylabel("Profit")
plt.plot(df['Spent'], df['Profit'], 'bo')

<h1 id="forward" style="color:black; background:white; border:0.5px dotted black;"> 
    <center>Forward propagation
        <a class="anchor-link" href="#forward" target="_self">¶</a>
    </center>
</h1>

In [None]:
def forward(gamma, X):
    w, b = gamma
    z = X * w + b
    return z

<h1 id="loss" style="color:black; background:white; border:0.5px dotted black;"> 
    <center>Loss function
        <a class="anchor-link" href="#loss" target="_self">¶</a>
    </center>
</h1>

In [None]:
def loss_fn(y, y_hat):
    return 1/2 * (y_hat - y) ** 2

In [None]:
def dloss(y, y_hat):
    return y_hat - y

<h1 id="back" style="color:black; background:white; border:0.5px dotted black;"> 
    <center>Backward propagation
        <a class="anchor-link" href="#back" target="_self">¶</a>
    </center>
</h1>

In [None]:
def gradients(gamma, X, y, y_hat):
    dl = dloss(y, y_hat)
    dw = dl * X
    db = dl
    return dw, db

<h1 id="parameters" style="color:black; background:white; border:0.5px dotted black;"> 
    <center>Hyperparameters
        <a class="anchor-link" href="#parameters" target="_self">¶</a>
    </center>
</h1>

## Weights and bias with Xavier initialization

In [None]:
np.random.seed(343242)
scale = 1/max(1., (2+2)/2.)
limit = math.sqrt(3.0 * scale)

w = np.random.uniform(-limit, limit, size=1)
b = np.random.uniform(-limit, limit, size=1)

gamma = [w, b]

## Features and labels + test dataset

In [None]:
X, y = df['Spent'].values, df['Profit'].values
test = np.arange(y.min(), y.max() + 0.5, 0.02)

# split into batches of 4
batches = np.array_split(np.arange(len(X)), len(X) / 4)

## Adamax hyperparameters

In [None]:
alpha = 0.1
beta1 = 0.9
beta2 = 0.999

m = np.zeros(len(gamma))
u = np.zeros(len(gamma))

<h1 id="training" style="color:black; background:white; border:0.5px dotted black;"> 
    <center>Training
        <a class="anchor-link" href="#training" target="_self">¶</a>
    </center>
</h1>

In [None]:
for e in range(100):

    for t, batch in enumerate(batches):
        y_hat = forward(gamma, X[batch])
        loss = loss_fn(y_hat, y[batch]).mean()

        dw, db = gradients(gamma, X[batch], y[batch], y_hat)
        g = dw.mean(), db.mean()

        # adamax
        for i in range(len(g)):
            m[i] = (beta1 * m[i] + (1 - beta1)) * g[i]
            u[i] = max(beta2 * u[i], abs(g[i]))
            gamma[i] -= (alpha / ( 1 - beta1)) * m[i] / u[i]

    if((e+1) % 10 == 0):
        clear_output(wait=True)
        plt.figure(figsize=(14,8))
        plt.plot(X, y, 'bo')
        plt.plot(test, forward(gamma, test), 'r-')
        plt.show()

<h1 id="prediction" style="color:black; background:white; border:0.5px dotted black;"> 
    <center>Prediction
        <a class="anchor-link" href="#prediction" target="_self">¶</a>
    </center>
</h1>

In [None]:
def predict(gamma, X, scaler):
    X = scaler.transform([[X, 0.]])[0]
    z = forward(gamma, X)[0]
    z = scaler.inverse_transform([[0., z]])[0][0]
    return z

In [None]:
# the lowest value for the prediction is the df['Spent'].min()
money_spent = df['Spent'].min()
# have to reverse it with the scaler
money_spent = scaler.inverse_transform([[money_spent, 0.]])[0][0]


profit_made = predict(gamma, money_spent, scaler)
print("Money Spent:%6.2f$ - Profit:%6.2f$" % (money_spent, profit_made))