In [1]:
import pandas as pd
import numpy as np
import torch
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import warnings

warnings.filterwarnings('ignore')

In [2]:
train_df = pd.read_csv("../data/split/train/train.csv")
test_df = pd.read_csv("../data/split/test/test.csv")

train_processed = train_df.drop('address', axis=1, errors='ignore')
train_processed = train_processed.dropna()

test_processed = test_df.drop('address', axis=1, errors='ignore')
test_processed = test_processed.dropna()

feature_cols = ['area', 'bedrooms', 'bathrooms']
target_col = 'price'

X_train_df = train_processed[feature_cols]
y_train_series = train_processed[target_col]

X_test_df = test_processed[feature_cols]
y_test_series = test_processed[target_col]

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_df)
X_test_scaled = scaler.transform(X_test_df)

In [3]:
X_train = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train = torch.tensor(y_train_series.values.reshape(-1, 1), dtype=torch.float32)

X_test = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test = torch.tensor(y_test_series.values.reshape(-1, 1), dtype=torch.float32)

In [4]:
model = torch.nn.Linear(in_features=X_train.shape[1], out_features=1)

criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

epochs = 10000
for epoch in range(epochs):
    y_pred = model(X_train)
    loss = criterion(y_pred, y_train)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 1000 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.6f}")

Epoch 1000/10000, Loss: 247.139130
Epoch 2000/10000, Loss: 227.269897
Epoch 2000/10000, Loss: 227.269897
Epoch 3000/10000, Loss: 213.210052
Epoch 3000/10000, Loss: 213.210052
Epoch 4000/10000, Loss: 202.320541
Epoch 4000/10000, Loss: 202.320541
Epoch 5000/10000, Loss: 193.384827
Epoch 5000/10000, Loss: 193.384827
Epoch 6000/10000, Loss: 186.122223
Epoch 6000/10000, Loss: 186.122223
Epoch 7000/10000, Loss: 180.433136
Epoch 7000/10000, Loss: 180.433136
Epoch 8000/10000, Loss: 176.214905
Epoch 8000/10000, Loss: 176.214905
Epoch 9000/10000, Loss: 173.335968
Epoch 9000/10000, Loss: 173.335968
Epoch 10000/10000, Loss: 171.612595
Epoch 10000/10000, Loss: 171.612595


In [5]:
model.eval()

with torch.no_grad():
    y_pred_tensor = model(X_test)
    y_pred = y_pred_tensor.numpy().flatten()
    y_true = y_test.numpy().flatten()

    y_pred = np.nan_to_num(y_pred, nan=0.0, posinf=y_true.max(), neginf=0.0)

    r2 = r2_score(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    
    print(f"R-squared (R²): {r2:.4f}")
    print(f"Mean Absolute Error (MAE): {mae:.4f} (tỷ VNĐ)")
    print(f"Root Mean Squared Error (RMSE): {rmse:.4f} (tỷ VNĐ)")

R-squared (R²): 0.0569
Mean Absolute Error (MAE): 6.0718 (tỷ VNĐ)
Root Mean Squared Error (RMSE): 15.1246 (tỷ VNĐ)


In [None]:
import joblib

w = model.weight.data.numpy().flatten()
b = model.bias.item()

for name, weight in zip(feature_cols, w):
    print(f"{name:25s}: {weight: .4f}")

print(f"\nBias (intercept): {b:.4f}")

torch.save(model.state_dict(), "model1.pth")

area                     :  0.0989
bedrooms                 :  3.1199
bathrooms                :  1.4954

Bias (intercept): 8.5292

Đã lưu model và scaler thành công!
