In [1]:
import tensorflow as tf
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pandas as pd, numpy as np

2025-11-09 23:34:01.081817: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
X_train = tf.convert_to_tensor(
    pd.read_csv("./data/x_train.csv").to_numpy(), dtype=float
)
y_train = tf.convert_to_tensor(
    pd.read_csv("./data/y_train.csv").to_numpy(), dtype=float
)

X_test = tf.convert_to_tensor(pd.read_csv("./data/x_test.csv").to_numpy(), dtype=float)
y_test = tf.convert_to_tensor(pd.read_csv("./data/y_test.csv").to_numpy(), dtype=float)

2025-11-09 23:34:04.001130: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [3]:
# -----------------------------
# INITIALIZE WEIGHTS AND BIAS
# -----------------------------

# Create a weight matrix (W) with random initial values
# Shape [7, 1] means there are 7 input features and 1 output
# 'tf.Variable' means this value can change (learned during training)
W = tf.Variable(tf.random.normal([X_train.shape[1], 1]), name="weights")

# Create a bias term (b), starting from zero
# Shape [1] means one bias value added to every prediction
b = tf.Variable(tf.zeros([1]), name="bias")

In [4]:
# -----------------------------
# SET LEARNING RATE
# -----------------------------

# Learning rate (lr) controls how big a step we take when updating weights
# Smaller = slower learning, larger = faster but may overshoot
lr = 0.01

In [5]:
# -----------------------------
# TRAINING LOOP
# -----------------------------

# Set a small tolerance value — if the loss stops changing by more than this, we’ll stop training early
tol = 1e-5

# Set the maximum number of epochs (full passes through the dataset)
# 1e7 = 10 million (a very large number, used here as an upper limit)
epochs = int(1e7)

# Store the previous loss value; start with infinity so any real loss will be smaller
prev_loss = float("inf")

# Repeat the training process for each epoch
for epoch in range(epochs):

    # Record operations for automatic differentiation (for computing gradients)
    with tf.GradientTape() as tape:

        # ---- Forward pass ----
        # Compute predicted outputs using the current weights and bias
        # tf.matmul() does matrix multiplication between X (inputs) and W (weights)
        # Then add the bias term 'b'
        y_pred = tf.matmul(X_train, W) + b

        # ---- Compute loss ----
        # Mean Squared Error (MSE): average of squared differences
        # between actual (y) and predicted (y_pred) values
        loss = tf.reduce_mean(tf.square(y_train - y_pred))

    # ---- Backward pass ----
    # Compute gradients of the loss with respect to weights and bias
    grads = tape.gradient(loss, [W, b])

    # ---- Update parameters ----
    # Manually adjust weights and bias in the opposite direction of the gradient
    # W = W - lr * gradient_of_W
    W.assign_sub(lr * grads[0])
    b.assign_sub(lr * grads[1])

    curr_loss = loss.numpy()

    # ---- Print progress ----
    # Show loss every 50 epochs so you can track improvement
    if (epoch + 1) % 50 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {curr_loss:.4f}")

    # ---- Early stopping condition ----
    # If the loss hasn’t changed much compared to the last epoch, stop training
    if abs(curr_loss - prev_loss) <= tol:
        break

    # Save current loss as previous loss for the next iteration
    prev_loss = curr_loss

Epoch [50/10000000], Loss: 17355841536.0000
Epoch [100/10000000], Loss: 14824551424.0000
Epoch [150/10000000], Loss: 14054148096.0000
Epoch [200/10000000], Loss: 13645386752.0000
Epoch [250/10000000], Loss: 13359970304.0000
Epoch [300/10000000], Loss: 13136565248.0000
Epoch [350/10000000], Loss: 12953476096.0000
Epoch [400/10000000], Loss: 12799982592.0000
Epoch [450/10000000], Loss: 12669357056.0000
Epoch [500/10000000], Loss: 12556845056.0000
Epoch [550/10000000], Loss: 12458892288.0000
Epoch [600/10000000], Loss: 12372782080.0000
Epoch [650/10000000], Loss: 12296401920.0000
Epoch [700/10000000], Loss: 12228084736.0000
Epoch [750/10000000], Loss: 12166522880.0000
Epoch [800/10000000], Loss: 12110661632.0000
Epoch [850/10000000], Loss: 12059657216.0000
Epoch [900/10000000], Loss: 12012832768.0000
Epoch [950/10000000], Loss: 11969631232.0000
Epoch [1000/10000000], Loss: 11929598976.0000
Epoch [1050/10000000], Loss: 11892366336.0000
Epoch [1100/10000000], Loss: 11857622016.0000
Epoch [1

In [6]:
ckpt = tf.train.Checkpoint(weights=W, bias=b)
ckpt.write("./model/model.tf")

'./model/model.tf'

In [7]:
ckpt = tf.train.Checkpoint(weights=W, bias=b)
ckpt.read("./model/model.tf")

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x73ff09cb88f0>

In [8]:
# Example inference
y_pred = tf.matmul(X_test, W) + b

y_test_flat, y_pred_flat = y_test.numpy().flatten(), y_pred.numpy().flatten()

mse = mean_squared_error(y_test_flat, y_pred_flat)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test_flat, y_pred_flat)
r2 = r2_score(y_test_flat, y_pred_flat)

print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"R2 Score: {r2:.4f}")

Mean Squared Error (MSE): 12283815936.0000
Root Mean Squared Error (RMSE): 110832.3777
Mean Absolute Error (MAE): 87416.5625
R2 Score: 0.5036
