In [1]:
# lets import necessary modules 
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/codebasics/deep-learning-keras-tf-tutorial/master/8_sgd_vs_gd/homeprices_banglore.csv')
df.head()

Unnamed: 0,area,bedrooms,price
0,1056,2,39.07
1,2600,4,120.0
2,1440,3,62.0
3,1521,3,75.0
4,1200,2,51.0


In [4]:
x = df.drop('price', axis=1)
y = df['price']

In [67]:
# lets scale the data first

x_scaler = MinMaxScaler()
y_scaler = MinMaxScaler()

scaled_x = x_scaler.fit_transform(x)
scaled_y = y_scaler.fit_transform(y.values.reshape(len(y), 1))

scaled_x[:5]

array([[0.08827586, 0.25      ],
       [0.62068966, 0.75      ],
       [0.22068966, 0.5       ],
       [0.24862069, 0.5       ],
       [0.13793103, 0.25      ]])

<h2>Mean squared Error</h2>

In [6]:
def mean_squared_error(y_true, y_predicted):
#     accepts two 1-D arrays
    total_loss = np.sum((y_true - y_predicted)**2)
    
    return total_loss / len(y_true)


<h2>Minibatch gradient descent</h2>

In [130]:
x_train, x_test, y_train, y_test = train_test_split(scaled_x,  scaled_y.reshape(-1, ), 
                                        random_state=10, test_size=0.2)

x_train.shape

(16, 2)

In [89]:
y_train

array([0.31851852, 0.51851852, 0.05925926, 0.22222222, 0.51111111,
       0.13333333, 0.20740741, 0.11851852, 0.        , 0.04444444,
       0.8       , 0.65185185, 0.05237037, 0.07407407, 0.14074074,
       1.        ])

In [129]:
import random

In [138]:
def minibatch_gradient_descent(x_train, y_true, epochs=1000, learning_rate=0.01):
    number_of_features = x_train.shape[1]
    w = np.ones(number_of_features)
    bias = 0
    
    for index in range(epochs):
        
        positions = random.sample(range(0, len(x_train)), k=5)
        
        total_samples = len(positions)
        new_x_train = x_train[positions]
        new_y_true = y_true[positions]
        
        y_pred = np.dot(w, new_x_train.T) + bias
        cost = mean_squared_error(new_y_true, y_pred)
        
        w_grad = -(2/total_samples) * (new_x_train.T.dot(new_y_true - y_pred))
        bias_grad = -(2/total_samples) * np.sum(new_y_true - y_pred)
        
        w = w - learning_rate * w_grad
        bias = bias - learning_rate * bias_grad
        
        print(f'For epoch of {index} the cost is {round(cost, 3)} and w = {np.round(w, 3)} and bias = {bias}')
        if cost <= 0.005:
            break
            
    return w, bias
        
w, bias = minibatch_gradient_descent(x_train, y_train)

For epoch of 0 the cost is 0.245 and w = [0.997 0.995] and bias = -0.009643269476372925
For epoch of 1 the cost is 0.148 and w = [0.995 0.992] and bias = -0.016990564719198617
For epoch of 2 the cost is 0.337 and w = [0.99  0.985] and bias = -0.02757038865828755
For epoch of 3 the cost is 0.211 and w = [0.986 0.981] and bias = -0.03534830356396891
For epoch of 4 the cost is 0.219 and w = [0.982 0.976] and bias = -0.04396373903025649
For epoch of 5 the cost is 0.164 and w = [0.979 0.973] and bias = -0.05069750318598426
For epoch of 6 the cost is 0.164 and w = [0.977 0.97 ] and bias = -0.05829631063895234
For epoch of 7 the cost is 0.297 and w = [0.971 0.963] and bias = -0.06793258752856951
For epoch of 8 the cost is 0.282 and w = [0.965 0.956] and bias = -0.07731264392546247
For epoch of 9 the cost is 0.157 and w = [0.962 0.953] and bias = -0.08376787390874058
For epoch of 10 the cost is 0.132 and w = [0.961 0.949] and bias = -0.09076197834679794
For epoch of 11 the cost is 0.2 and w = 

In [139]:
y_pred = np.dot(w, x_test.T) + bias
print(y_pred.reshape(len(y_pred), 1))
y_prediction = y_scaler.inverse_transform(y_pred.reshape(len(y_pred), 1))
y_t = y_scaler.inverse_transform(y_test.reshape(len(y_test), 1))

[[ 1.00242494]
 [ 0.37591601]
 [-0.00169052]
 [ 0.83992314]]


In [140]:
print(f'y_prediction: {y_prediction.reshape(-1, )}') 
print(f'y_true: {y_t.reshape(-1, )}')

y_prediction: [167.32736676  82.74866116  31.77178019 145.38962379]
y_true: [155.  82.  38. 135.]
