In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 1) Load the Salary dataset
#    (Same link as in your snippet)
salary = pd.read_csv('https://github.com/ybifoundation/Dataset/raw/main/Salary%20Data.csv')

# Extract features (X) and target (y)
X = salary['Experience Years'].values  # shape: (30,) typically
y = salary['Salary'].values           # shape: (30,)

# Reshape X to be a 2D array for matrix operations: (n_samples, 1)
X = X.reshape(-1, 1)

# (Optional) Let's normalize data for easier gradient updates
# But you can skip this if you want to keep the original scale.
X_mean, X_std = X.mean(), X.std()
y_mean, y_std = y.mean(), y.std()

X_norm = (X - X_mean) / X_std
y_norm = (y - y_mean) / y_std

# 2) Define helper functions
def predict(X, w, b):
    """
    Given input data X (shape (n_samples, 1)), weight w (scalar),
    and bias b (scalar), return predictions y_pred.
    """
    return w * X + b

def compute_gradients(X, y, w, b):
    """
    Compute dL/dw and dL/db for MSE loss:
    L = 1/(2n) * sum( (y_pred - y)^2 ), for n samples
    """
    n = len(X)
    y_pred = predict(X, w, b)
    error = (y_pred - y)  # shape (n_samples,)

    # dL/dw = (1/n) * sum(error * X)
    dw = (1/n) * np.sum(error * X)

    # dL/db = (1/n) * sum(error)
    db = (1/n) * np.sum(error)

    return dw, db

# 3) Stochastic Gradient Descent (SGD) function
def sgd_linear_regression(X, y, lr=0.01, epochs=10):
    """
    Train a linear regression model via basic SGD (batch for simplicity),
    logging the regression line each epoch.
    Returns final (w, b).
    """
    # Initialize parameters (w, b) randomly
    w = np.random.randn()
    b = np.random.randn()
    
    # Number of training examples
    n = len(X)
    
    for epoch in range(1, epochs+1):
        # 1) Compute gradients
        dw, db = compute_gradients(X, y, w, b)
        
        # 2) Update parameters
        w -= lr * dw
        b -= lr * db
        
        # 3) Plot the data + current regression line after each epoch
        plt.figure(figsize=(6,4))
        plt.scatter(X, y, label='Data')
        
        # Create line for the current w, b
        # We'll sample across the range of X
        X_line = np.linspace(X.min(), X.max(), 100)
        y_line = w * X_line + b
        plt.plot(X_line, y_line, color='red', label='SGD Model')

        plt.title(f'SGD - Epoch {epoch}')
        plt.xlabel('X (normalized)' if X_std != 1 else 'X')
        plt.ylabel('y (normalized)' if y_std != 1 else 'y')
        plt.legend()
        plt.show()
        
    return w, b

# 4) SGD with Momentum
def sgd_momentum_linear_regression(X, y, lr=0.01, gamma=0.9, epochs=10):
    """
    Train a linear regression model via SGD with Momentum.
    gamma is the momentum coefficient (0 < gamma < 1).
    Returns final (w, b).
    """
    w = np.random.randn()
    b = np.random.randn()
    
    # Initialize velocity terms for w and b
    vw, vb = 0.0, 0.0
    
    for epoch in range(1, epochs+1):
        dw, db = compute_gradients(X, y, w, b)
        
        # Update velocities
        vw = gamma * vw + lr * dw
        vb = gamma * vb + lr * db
        
        # Update parameters
        w -= vw
        b -= vb
        
        # Plot after each epoch
        plt.figure(figsize=(6,4))
        plt.scatter(X, y, label='Data')
        
        X_line = np.linspace(X.min(), X.max(), 100)
        y_line = w * X_line + b
        plt.plot(X_line, y_line, color='green', label='SGD+Momentum Model')
        
        plt.title(f'SGD + Momentum - Epoch {epoch}')
        plt.xlabel('X (normalized)' if X_std != 1 else 'X')
        plt.ylabel('y (normalized)' if y_std != 1 else 'y')
        plt.legend()
        plt.show()
        
    return w, b


# 5) Let's run both training procedures on the normalized data
print("==== Training SGD (no momentum) ====")
w_sgd, b_sgd = sgd_linear_regression(X_norm, y_norm, lr=0.1, epochs=10)

print("Final parameters from basic SGD:")
print(f"w = {w_sgd:.4f}, b = {b_sgd:.4f}")

print("\n==== Training SGD + Momentum ====")
w_mom, b_mom = sgd_momentum_linear_regression(X_norm, y_norm, lr=0.1, gamma=0.9, epochs=10)

print("Final parameters from SGD+Momentum:")
print(f"w = {w_mom:.4f}, b = {b_mom:.4f}")