<a href="https://colab.research.google.com/github/rawanhussein77/-stochastic-gradient-descent/blob/main/Welcome_To_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd


file_path = "/content/diamonds.csv"
df = pd.read_csv(file_path)


df.head()


Unnamed: 0.1,Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,1,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,2,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,3,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,4,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,5,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


features = ["carat", "depth", "table", "x", "y", "z"]
target = "price"

X = df[features]
y = df[target]


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


X_train.shape, X_test.shape


((43152, 6), (10788, 6))

In [3]:
import numpy as np

X_train_b = np.c_[np.ones((X_train.shape[0], 1)), X_train]
X_test_b = np.c_[np.ones((X_test.shape[0], 1)), X_test]


def batch_gradient_descent(X, y, learning_rate=0.01, n_iterations=1000):
    m = len(y)
    theta = np.random.randn(X.shape[1], 1)
    y = y.values.reshape(-1, 1)

    for iteration in range(n_iterations):
        gradients = (2/m) * X.T.dot(X.dot(theta) - y)
        theta -= learning_rate * gradients

    return theta

theta_bgd = batch_gradient_descent(X_train_b, y_train)

theta_bgd.ravel()


array([3936.22568532, 3115.81756456, -180.61580251, -222.66505061,
        458.71877766,   68.93561132,   33.25265615])

In [5]:
def stochastic_gradient_descent(X, y, learning_rate=0.01, n_epochs=50):
    m = len(y)
    theta = np.random.randn(X.shape[1], 1)
    y = y.values.reshape(-1, 1)

    for epoch in range(n_epochs):
        for i in range(m):
            random_index = np.random.randint(m)
            xi = X[random_index:random_index+1]
            yi = y[random_index:random_index+1]
            gradients = 2 * xi.T.dot(xi.dot(theta) - yi)
            theta -= learning_rate * gradients

    return theta


theta_sgd = stochastic_gradient_descent(X_train_b, y_train)


theta_sgd.ravel()


array([ 2.81611866e+54,  8.57924817e+55, -1.90672731e+55, -3.97992742e+54,
        1.75803459e+56, -3.78742269e+56,  1.20655363e+56])

In [6]:
def stochastic_gradient_descent_fixed(X, y, learning_rate=0.001, n_epochs=50):
    m = len(y)
    theta = np.random.randn(X.shape[1], 1)
    y = y.values.reshape(-1, 1)

    for epoch in range(n_epochs):
        for i in range(m):
            random_index = np.random.randint(m)
            xi = X[random_index:random_index+1]
            yi = y[random_index:random_index+1]
            gradients = 2 * xi.T.dot(xi.dot(theta) - yi)
            theta -= learning_rate * gradients

    return theta

theta_sgd_fixed = stochastic_gradient_descent_fixed(X_train_b, y_train)


theta_sgd_fixed.ravel()


array([ 4005.96173041,  5155.59783381,  -222.62371619,  -291.22432943,
       -1146.27694538,  -395.37274413,   147.03809831])

In [8]:
def mini_batch_gradient_descent(X, y, learning_rate=0.01, n_epochs=50, batch_size=32):
    m = len(y)
    theta = np.random.randn(X.shape[1], 1)
    y = y.values.reshape(-1, 1)

    for epoch in range(n_epochs):
        indices = np.random.permutation(m)
        X_shuffled = X[indices]
        y_shuffled = y[indices]

        for i in range(0, m, batch_size):
            xi = X_shuffled[i:i+batch_size]
            yi = y_shuffled[i:i+batch_size]
            gradients = (2 / batch_size) * xi.T.dot(xi.dot(theta) - yi)
            theta -= learning_rate * gradients

    return theta

theta_mbgd = mini_batch_gradient_descent(X_train_b, y_train)

theta_mbgd.ravel()


array([ 3.91826201e+03,  5.10366088e+03, -2.69692564e+02, -2.18611766e+02,
       -1.38041327e+03,  4.51122562e+01,  4.29751636e+00])

In [9]:
from sklearn.metrics import mean_squared_error

y_pred_bgd = X_test_b.dot(theta_bgd)
y_pred_sgd = X_test_b.dot(theta_sgd_fixed)
y_pred_mbgd = X_test_b.dot(theta_mbgd)

mse_bgd = mean_squared_error(y_test, y_pred_bgd)
mse_sgd = mean_squared_error(y_test, y_pred_sgd)
mse_mbgd = mean_squared_error(y_test, y_pred_mbgd)

mse_bgd, mse_sgd, mse_mbgd


(2418935.716532594, 2280463.3363816924, 2251928.3323323703)