In [11]:
import torch
import matplotlib.pyplot as plt
import torch

from utils.TD_NN import TemporalDifferenceNN

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

import numpy as np

seed = 42
torch.manual_seed(seed)

<torch._C.Generator at 0x1c7a845daf0>

In [12]:
housedata = torch.tensor(np.loadtxt('data\\readyhousedata.txt', delimiter=','), dtype=torch.float32)

X = housedata[:, :-1]
y = housedata[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

reg = LinearRegression() 
reg.fit(X_train, y_train)

weights = torch.tensor(reg.coef_)
intercept = torch.tensor(reg.intercept_)

print("Weights learned using LR:\n", weights)
print("Intercept:", intercept)

score = reg.score(X_test, y_test)
print("Model R^2 score:", score)

print(f"RMSE on test set: {torch.sqrt(torch.tensor(mean_squared_error(y_test, reg.predict(X_test)))):.4f}")


Weights learned using LR:
 tensor([-10.9937,   5.0058,   0.6358,   2.4487, -10.3119,  17.5176,   1.0312,
        -17.7757,   7.3409,  -6.4276,  -8.7997,   3.0356, -20.5194])
Intercept: tensor(29.0443)
Model R^2 score: 0.7730569243431091
RMSE on test set: 4.1547


In [13]:
def mini_batch_sgd(
        X: np.ndarray, 
        y: np.ndarray, 
        learning_rate: float, 
        n_iter: int, 
        batch_size: int,
        epsilon: float = 0, 
    ) -> np.ndarray:
    """
    Performs Mini-Batch Stochastic Gradient Descent (SGD) for linear regression with shuffling.
    
    Parameters:
    X (np.ndarray): Feature matrix (n_samples, n_features).
    y (np.ndarray): Target vector (n_samples,).
    learning_rate (float): Step size for updating weights.
    n_iter (int): Number of iterations (epochs).
    epsilon (float): Convergence threshold.
    batch_size (int): Size of the mini-batches.
    
    Returns:
    np.ndarray: The learned weights.
    """
    n_samples, n_features = X.shape
    # Add bias term to the feature matrix
    X_bias = np.c_[np.ones(n_samples), X]  # Adds a column of ones for the bias term

    # Initialize weights to zeros
    weights = np.zeros(n_features + 1)

    for epoch in range(int(n_iter)):
        indices = np.arange(n_samples)
        np.random.shuffle(indices)
        X_bias_shuffled = X_bias[indices]
        y_shuffled = y[indices]

        for i in range(0, n_samples, batch_size):
            X_batch = X_bias_shuffled[i:i + batch_size]
            y_batch = y_shuffled[i:i + batch_size]
            
            # Initialize gradients to zero
            gradient = np.zeros_like(weights)
            
            # Compute the gradient over the mini-batch
            for j in range(X_batch.shape[0]):
                # Prediction
                prediction = np.dot(X_batch[j], weights)
                error = y_batch[j] - prediction
                
                # Update gradient
                gradient += -2 * X_batch[j] * error

            # Update the weights
            weights -= learning_rate * gradient / batch_size

        # Check for convergence (if gradient is small enough)
        if np.linalg.norm(gradient) < epsilon:
            print(f"Converged after {epoch + 1} epochs")
            break

    return weights

X_train_np = np.array(X_train)
X_test_np = np.array(X_test)
y_train_np = np.array(y_train)
y_test_np = np.array(y_test)

w_sgd = mini_batch_sgd(
    X_train_np,
    y_train_np,
    learning_rate=0.01,
    n_iter=1e4,
    batch_size=32
)


In [14]:
num_samples, num_features = X_train.shape
P = torch.ones((num_samples, num_samples)) / num_samples # Equal probability to move to any state

identity = lambda x : x

In [15]:
# Initialize the TD_NN_SGD model
td_nn_sgd = TemporalDifferenceNN(
    optimizer='sgd',
    P=P,
    link=identity,
    inv_link=identity,
    gamma=0,
    learning_rate=0.001,
    epsilon=10,
    input_size=num_features,
)

# Fit the model
td_nn_sgd.fit(X_train, y_train, epochs=800)

# Evaluate the model
rmse = td_nn_sgd.rmse(X_test, y_test)
print(f"RMSE on test set: {rmse:.4f}")

Training finished. Final epoch loss: 146.8429047511174
RMSE on test set: 2.6127


In [16]:
from utils.TD import TemporalDifferenceLinear

td_sgd = TemporalDifferenceLinear(
    optimizer='sgd',
    input_size=num_features,
    learning_rate=0.01,
    gamma=0,
    epsilon=0,
    n_iter=1e5,
    P=P,
    link=identity,
    inv_link=identity,
)

td_sgd.fit(X_train, y_train)

In [17]:
print(f"Norm of difference in weights for L2 and TD SGD: {torch.norm(weights - td_sgd.model.weight.data, 2)}")
print(f"RMSE on the test set using TD SGD: {td_sgd.rmse(X_test, y_test)}")

Norm of difference in weights for L2 and TD SGD: 2.0422887802124023
RMSE on the test set using TD SGD: 4.173651695251465


In [21]:
pred_L2 = reg.predict(X_test)
pred_sgd = td_nn_sgd.predict(X_test)

rmse_TD_sgd = td_nn_sgd.rmse(X_test, y_test)
# rmse_TD_adam = td_adam.rmse(X_test, y_test)
rmse_L2 = torch.sqrt(torch.tensor(mean_squared_error(y_test, pred_L2)))

print(f"RMSE on the test set using TD SGD: {rmse_TD_sgd}")
# print(f"RMSE on the test set using TD Adam: {rmse_TD_adam}")
print(f"RMSE on the test set using L2 linear Regression: {rmse_L2}")
print("---------------")
print(f"Absolute difference between values: {torch.mean(torch.abs(torch.Tensor(pred_L2) - pred_sgd))}")
print(f"Norm of difference in weights for L2 and SGD: {torch.norm(weights - torch.Tensor(w_sgd[1:]), 2)}")
print(f"Norm of difference in weights for TD SGD and L2 SGD: {torch.norm(td_sgd.model.weight.data - torch.Tensor(w_sgd[1:]), 2)}")


RMSE on the test set using TD SGD: 2.612731695175171
RMSE on the test set using L2 linear Regression: 4.154654026031494
---------------
Absolute difference between values: 8.702795028686523
Norm of difference in weights for L2 and SGD: 0.008345226757228374
Norm of difference in weights for TD SGD and L2 SGD: 2.047274351119995


In [22]:
print(f"Norm of difference in weights for L2 and TD SGD: {torch.norm(weights - td_sgd.model.weight.data, 2)}")
print(f"RMSE on the test set using TD SGD: {td_sgd.rmse(X_test, y_test)}")


Norm of difference in weights for L2 and TD SGD: 2.0422887802124023
RMSE on the test set using TD SGD: 4.173651695251465


: 

In [20]:
print(td_sgd.model.weight.data)
print(weights)

tensor([[-11.1315,   4.4402,   0.2227,   1.6646,  -9.7619,  17.1236,   0.9290,
         -17.1550,   6.9490,  -7.1140,  -8.6243,   2.5696, -19.3702]])
tensor([-10.9937,   5.0058,   0.6358,   2.4487, -10.3119,  17.5176,   1.0312,
        -17.7757,   7.3409,  -6.4276,  -8.7997,   3.0356, -20.5194])
