In [5]:
!pip install numpy pandas matplotlib scikit-learn torch wandb

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [6]:
from sklearn.datasets import fetch_california_housing, load_iris
import pandas as pd

# Load datasets
california = fetch_california_housing()
iris = load_iris()

# Convert to DataFrames (for easier handling)
california_df = pd.DataFrame(california.data, columns=california.feature_names)
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)

print("California Housing Dataset:\n", california_df.head())
print("\nIris Dataset (Classification):\n", iris_df.head())


California Housing Dataset:
    MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
0  8.3252      41.0  6.984127   1.023810       322.0  2.555556     37.88   
1  8.3014      21.0  6.238137   0.971880      2401.0  2.109842     37.86   
2  7.2574      52.0  8.288136   1.073446       496.0  2.802260     37.85   
3  5.6431      52.0  5.817352   1.073059       558.0  2.547945     37.85   
4  3.8462      52.0  6.281853   1.081081       565.0  2.181467     37.85   

   Longitude  
0    -122.23  
1    -122.22  
2    -122.24  
3    -122.25  
4    -122.25  

Iris Dataset (Classification):
    sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                5.1               3.5                1.4               0.2
1                4.9               3.0                1.4               0.2
2                4.7               3.2                1.3               0.2
3                4.6               3.1                1.5               0.2
4          

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

# Prepare data
X = california.data
y = california.target

# Split into train/test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features (critical for Gradient Descent)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Add bias term (intercept)
X_train = np.c_[np.ones(X_train.shape[0]), X_train]
X_test = np.c_[np.ones(X_test.shape[0]), X_test]

In [8]:
def batch_gradient_descent(X, y, learning_rate=0.01, epochs=1000):
    theta = np.zeros(X.shape[1])  # Initialize weights
    m = len(y)                   # Number of samples

    for epoch in range(epochs):
        gradient = (1/m) * X.T.dot(X.dot(theta) - y)
        theta -= learning_rate * gradient
        if epoch % 100 == 0:
            loss = np.mean((X.dot(theta) - y) ** 2)
            print(f"Epoch {epoch}, Loss: {loss:.4f}")
    return theta

theta_batch = batch_gradient_descent(X_train, y_train)

Epoch 0, Loss: 5.5300
Epoch 100, Loss: 1.2906
Epoch 200, Loss: 0.7102
Epoch 300, Loss: 0.6187
Epoch 400, Loss: 0.5952
Epoch 500, Loss: 0.5827
Epoch 600, Loss: 0.5732
Epoch 700, Loss: 0.5652
Epoch 800, Loss: 0.5584
Epoch 900, Loss: 0.5526


In [9]:
def stochastic_gradient_descent(X, y, learning_rate=0.01, epochs=10):
    theta = np.zeros(X.shape[1])
    m = len(y)

    for epoch in range(epochs):
        for i in range(m):
            random_idx = np.random.randint(m)
            xi = X[random_idx:random_idx+1]
            yi = y[random_idx:random_idx+1]
            gradient = xi.T.dot(xi.dot(theta) - yi)
            theta -= learning_rate * gradient
        loss = np.mean((X.dot(theta) - y) ** 2)
        print(f"Epoch {epoch}, Loss: {loss:.4f}")
    return theta

theta_sgd = stochastic_gradient_descent(X_train, y_train)

Epoch 0, Loss: 13117342.6212
Epoch 1, Loss: 8707907847586.7520
Epoch 2, Loss: 2691754577329233408.0000
Epoch 3, Loss: 727974349089721286656.0000
Epoch 4, Loss: 425967621767871805553377280.0000
Epoch 5, Loss: 37808035002289288316680825294815232.0000
Epoch 6, Loss: 1552640477607748561057900280010735802630421872640.0000
Epoch 7, Loss: 23478771063493357881330152423463208558989372108050530304.0000
Epoch 8, Loss: 915862525960338060392503752546488775540011217436750442276061184.0000
Epoch 9, Loss: 18152298601015190667694864643926415362093427922046129325101981381789834557784064.0000


In [10]:
def mini_batch_gradient_descent(X, y, learning_rate=0.01, epochs=100, batch_size=32):
    theta = np.zeros(X.shape[1])
    m = len(y)

    for epoch in range(epochs):
        shuffled_indices = np.random.permutation(m)
        X_shuffled = X[shuffled_indices]
        y_shuffled = y[shuffled_indices]

        for i in range(0, m, batch_size):
            xi = X_shuffled[i:i+batch_size]
            yi = y_shuffled[i:i+batch_size]
            gradient = (1/batch_size) * xi.T.dot(xi.dot(theta) - yi)
            theta -= learning_rate * gradient
        loss = np.mean((X.dot(theta) - y) ** 2)
        print(f"Epoch {epoch}, Loss: {loss:.4f}")
    return theta

theta_minibatch = mini_batch_gradient_descent(X_train, y_train)

Epoch 0, Loss: 0.5842
Epoch 1, Loss: 0.5462
Epoch 2, Loss: 0.5319
Epoch 3, Loss: 0.5266
Epoch 4, Loss: 0.5234
Epoch 5, Loss: 0.5256
Epoch 6, Loss: 0.5213
Epoch 7, Loss: 0.5198
Epoch 8, Loss: 0.5195
Epoch 9, Loss: 0.5202
Epoch 10, Loss: 0.5212
Epoch 11, Loss: 0.5184
Epoch 12, Loss: 0.5216
Epoch 13, Loss: 0.5234
Epoch 14, Loss: 0.5191
Epoch 15, Loss: 0.5207
Epoch 16, Loss: 0.5226
Epoch 17, Loss: 0.5254
Epoch 18, Loss: 0.5215
Epoch 19, Loss: 0.5274
Epoch 20, Loss: 0.5184
Epoch 21, Loss: 0.5195
Epoch 22, Loss: 0.5184
Epoch 23, Loss: 0.5196
Epoch 24, Loss: 0.5181
Epoch 25, Loss: 0.5190
Epoch 26, Loss: 0.5185
Epoch 27, Loss: 0.5247
Epoch 28, Loss: 0.5218
Epoch 29, Loss: 0.5196
Epoch 30, Loss: 0.5190
Epoch 31, Loss: 0.5199
Epoch 32, Loss: 0.5191
Epoch 33, Loss: 0.5260
Epoch 34, Loss: 0.5185
Epoch 35, Loss: 0.5184
Epoch 36, Loss: 0.5214
Epoch 37, Loss: 0.5197
Epoch 38, Loss: 0.5181
Epoch 39, Loss: 0.5271
Epoch 40, Loss: 0.5206
Epoch 41, Loss: 0.5188
Epoch 42, Loss: 0.5311
Epoch 43, Loss: 0.522

In [11]:
def batch_gd_with_regularization(X, y, learning_rate=0.01, epochs=1000, lambda_=0.1, reg_type='l2'):
    theta = np.zeros(X.shape[1])
    m = len(y)

    for epoch in range(epochs):
        gradient = (1/m) * X.T.dot(X.dot(theta) - y)
        if reg_type == 'l2':
            gradient += lambda_ * theta  # L2 penalty
        theta -= learning_rate * gradient
        if epoch % 100 == 0:
            loss = np.mean((X.dot(theta) - y) ** 2)
            print(f"Epoch {epoch}, Loss: {loss:.4f}")
    return theta

theta_l2 = batch_gd_with_regularization(X_train, y_train, reg_type='l2')

Epoch 0, Loss: 5.5300
Epoch 100, Loss: 1.3881
Epoch 200, Loss: 0.8039
Epoch 300, Loss: 0.6892
Epoch 400, Loss: 0.6550
Epoch 500, Loss: 0.6402
Epoch 600, Loss: 0.6318
Epoch 700, Loss: 0.6262
Epoch 800, Loss: 0.6220
Epoch 900, Loss: 0.6188


In [12]:
def batch_gd_with_early_stopping(X, y, learning_rate=0.01, epochs=1000, patience=5):
    theta = np.zeros(X.shape[1])
    m = len(y)
    best_loss = np.inf
    patience_counter = 0

    for epoch in range(epochs):
        gradient = (1/m) * X.T.dot(X.dot(theta) - y)
        theta -= learning_rate * gradient
        loss = np.mean((X.dot(theta) - y) ** 2)

        if loss < best_loss:
            best_loss = loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch}")
                break

        if epoch % 100 == 0:
            print(f"Epoch {epoch}, Loss: {loss:.4f}")
    return theta

theta_early_stop = batch_gd_with_early_stopping(X_train, y_train)

Epoch 0, Loss: 5.5300
Epoch 100, Loss: 1.2906
Epoch 200, Loss: 0.7102
Epoch 300, Loss: 0.6187
Epoch 400, Loss: 0.5952
Epoch 500, Loss: 0.5827
Epoch 600, Loss: 0.5732
Epoch 700, Loss: 0.5652
Epoch 800, Loss: 0.5584
Epoch 900, Loss: 0.5526


In [13]:
def evaluate_model(theta, X_test, y_test):
    predictions = X_test.dot(theta)
    mse = np.mean((predictions - y_test) ** 2)
    print(f"Mean Squared Error (MSE): {mse:.4f}")

print("Batch GD Performance:")
evaluate_model(theta_batch, X_test, y_test)

print("\nSGD Performance:")
evaluate_model(theta_sgd, X_test, y_test)

print("\nMini-Batch GD Performance:")
evaluate_model(theta_minibatch, X_test, y_test)

Batch GD Performance:
Mean Squared Error (MSE): 0.5672

SGD Performance:
Mean Squared Error (MSE): 407910879798838541178048769091541412551954681174627648681088614246860927795200.0000

Mini-Batch GD Performance:
Mean Squared Error (MSE): 0.5607


In [14]:
import joblib

# Save the model and scaler
joblib.dump(theta_batch, 'batch_gd_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

# Verify files are saved
!ls

batch_gd_model.pkl  sample_data  scaler.pkl


In [15]:
!pip install huggingface_hub



In [16]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [17]:
from huggingface_hub import create_repo

repo_name = "keenu-5008/california-housing-regression"  # Replace with your HF username
create_repo(repo_name, repo_type="model")

HfHubHTTPError: 409 Client Error: Conflict for url: https://huggingface.co/api/repos/create (Request ID: Root=1-67eedee5-12271dc7236dc0847f3593a5;3375012b-bf30-428c-aaa1-8f325458bd70)

You already created this model repo

In [18]:
from huggingface_hub import upload_file

# Upload model and scaler
upload_file(
    path_or_fileobj="batch_gd_model.pkl",
    path_in_repo="batch_gd_model.pkl",
    repo_id=repo_name,
)

upload_file(
    path_or_fileobj="scaler.pkl",
    path_in_repo="scaler.pkl",
    repo_id=repo_name,
)

batch_gd_model.pkl:   0%|          | 0.00/297 [00:00<?, ?B/s]

scaler.pkl:   0%|          | 0.00/807 [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/keenu-5008/california-housing-regression/commit/eb5a90b60df04379501bee256d3e56c34952ced4', commit_message='Upload scaler.pkl with huggingface_hub', commit_description='', oid='eb5a90b60df04379501bee256d3e56c34952ced4', pr_url=None, repo_url=RepoUrl('https://huggingface.co/keenu-5008/california-housing-regression', endpoint='https://huggingface.co', repo_type='model', repo_id='keenu-5008/california-housing-regression'), pr_revision=None, pr_num=None)