In [47]:
import numpy as np
import matplotlib.pyplot as plt
import torch
from typing import Callable

from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error

np.random.seed(42)

In [40]:
def sample_next_state(index: int, P: np.ndarray) -> int:
    probs = P[index]

    next = np.random.choice(range(P.shape[1]), p=probs)

    return int(next)

In [41]:
def td_loop(
        n_iter: int, 
        X: np.ndarray, 
        y: np.ndarray, 
        P: np.ndarray,
        link: Callable[[np.ndarray], np.ndarray], 
        inv_link: Callable[[np.ndarray], np.ndarray], 
        gamma: float, 
        alpha: float,
        epsilon: float,
    ) -> np.ndarray:
    n_samples = X.shape[0]
    n_features = X.shape[1]

    w = np.zeros(n_features)

    curr_index = int(np.random.randint(n_samples))
    curr_x = X[curr_index]
    curr_y = y[curr_index]

    i = 0
    grad = np.ones_like(w) * np.inf

    while i < n_iter and np.linalg.norm(alpha * grad, 2) > epsilon:
        # Next state samples
        next_index = sample_next_state(index=curr_index, P=P)
        next_x = X[next_index]
        next_y = y[next_index]

        # Find predictions
        curr_z = np.dot(curr_x, w)
        next_z = np.dot(next_x, w)

        # Find rewards
        r = inv_link(curr_y) - gamma * inv_link(next_y)

        # TD target
        z_t = r + gamma * next_z

        # Find gradient
        grad = (link(curr_z) - link(z_t)) * curr_x

        # Update weights
        w -= alpha * grad
        
        # Update state and index
        curr_index, curr_x, curr_y = next_index, next_x, next_y
        i += 1

    return w

In [42]:
# Generate synthetic data
num_samples = 100
num_features = 3

X = np.random.randn(num_samples, num_features)
true_w = np.array([2.0, -3.5, 1.0])
y = X @ true_w + np.random.randn(num_samples) * 0.1  # Adding noise

alpha = 0.01  # Learning rate
gamma = 0.9   # Discount factor
num_iterations = 1e6  # Number of iterations
epsilon = 1e-7

P = np.ones((num_samples, num_samples)) / num_samples # Equal probability to move to any state

In [43]:
w_hat = td_loop(
    n_iter=num_iterations,
    X=X,
    y=y,
    P=P,
    link=lambda x : x,
    inv_link=lambda x : x,
    gamma=gamma,
    alpha=alpha,
    epsilon=epsilon,
)

error = np.linalg.norm(w_hat - true_w, 2)

print(w_hat)
print(f'Error: {error:.5f}')

[ 2.00494837 -3.50096249  0.99077277]
Error: 0.01051


In [44]:
housedata = np.loadtxt('data\\readyhousedata.txt', delimiter=',')

# Separate features (X) and target (y)
X = housedata[:, :-1]  # All columns except the last
y = housedata[:, -1]   # The last column

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

ridge_reg = Ridge(alpha=0.5) 
ridge_reg.fit(X_train, y_train)

weights = ridge_reg.coef_
intercept = ridge_reg.intercept_

print(weights)
print("Intercept:", intercept)

score = ridge_reg.score(X_test, y_test)
print("Model R^2 score:", score)

[ -8.76539474   4.43378117   0.13628616   2.57877857  -9.02437005
  17.1000068    0.9618252  -15.70967281   6.32962149  -5.73996826
  -8.60956028   3.07264343 -20.06971951]
Intercept: 28.311813184508072
Model R^2 score: 0.7780162544000442


In [45]:
num_samples = X_train.shape[0]
P = np.ones((num_samples, num_samples)) / num_samples # Equal probability to move to any state

alpha = 0.001  # Learning rate
gamma = 0.8   # Discount factor
num_iterations = 1e5  # Number of iterations
epsilon = 1e-8

w_hat_house = td_loop(
    n_iter=num_iterations,
    X=X_train,
    y=y_train,
    P=P,
    link=lambda x : x,
    inv_link=lambda x : x,
    gamma=gamma,
    alpha=alpha,
    epsilon=epsilon,
)

print(w_hat_house)

[ -4.87956743   5.41510283   1.71360406   2.87707084  -1.72569767
  30.70445873   4.43625135  -2.03672037   3.67499225  -3.50890074
  -3.66858579  10.04479166 -15.41765695]


In [48]:
pred_TD = np.dot(X_test, w_hat_house)
pred_L2 = ridge_reg.predict(X_test)

rmse_TD = np.sqrt(mean_squared_error(y_test, pred_TD))
rmse_L2 = np.sqrt(mean_squared_error(y_test, pred_L2))

print("RMSE on the test set using TD:", rmse_TD)
print("RMSE on the test set using L2 Regression:", rmse_L2)


RMSE on the test set using TD: 4.295622700002231
RMSE on the test set using L2 Regression: 4.109007658427198
