# Implementing Stochastic gradient descent 

Benefits : Stochastic Gradient Descent 

- It is easier to fit into memory due to a single training sample being processed by the network.
- It is computationally fast as only one sample is processed at a time.
- For larger datasets it can converge faster as it causes updates to the parameters more frequently.

In [2]:
# import libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.metrics import mean_squared_error

  return f(*args, **kwds)


In [3]:
# Load california housing data

housing_data = fetch_california_housing()



In [6]:
# creating Target variable

Features = pd.DataFrame(housing_data.data, columns = housing_data.feature_names)
Target = pd.DataFrame(housing_data.target, columns = ["Target"])



In [7]:
df = Features.join(Target)



In [9]:
# exploring dataset

df.head(5)

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,Target
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [10]:
# Removing outliers

df = df[df.Target < 3.5]
df = df[df.MedInc < 8]

In [11]:
# Describing the dataset

df[['MedInc', 'Target']].describe()[1:]

Unnamed: 0,MedInc,Target
mean,3.48203,1.722805
std,1.364922,0.749957
min,0.4999,0.14999
25%,2.452025,1.119
50%,3.3036,1.635
75%,4.34605,2.256
max,7.9887,3.499


In [12]:
# scaling variables

def scale(x):
    min = x.min()
    max = x.max()
    return pd.Series([(i - min)/(max - min) for i in x])

X = scale(df.MedInc)
y = scale(df.Target)

In [13]:
# Implemanting SGD algorithm

X = df.MedInc
y = df.Target

# stochastic gradient descent
def SGD(X, y, lr=0.05, epoch=10, batch_size=1):
        
    '''
    Stochastic Gradient Descent for a single feature
    '''
    
    m, b = 0.5, 0.5 # initial parameters
    log, mse = [], [] # lists to store learning process
    
    for _ in range(epoch):
        
        indexes = np.random.randint(0, len(X), batch_size) # random sample
        
        Xs = np.take(X, indexes)
        ys = np.take(y, indexes)
        N = len(Xs)
        
        f = ys - (m*Xs + b)
        
        # Updating parameters m and b
        m -= lr * (-2 * Xs.dot(f).sum() / N)
        b -= lr * (-2 * f.sum() / N)
        
        log.append((m, b))
        mse.append(mean_squared_error(y, m*X+b))        
    
    return m, b, log, mse




In [14]:
# Providing the parameters

m, b, log, mse = SGD(X,y, lr = 0.01, epoch = 10, batch_size = 2)


In [15]:
# Providing Y_predictions

y_pred = m*X + b

print("MSE:",mean_squared_error(y, y_pred))


MSE: 0.3551750013813474


In [16]:
# Note : It is ideal to use the standard implementation of SGD as it is optimised for speed and accuracy. 