In [11]:
import pandas as pd
import torch
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import r2_score, mean_squared_error

# Load direct from split folder
train_df = pd.read_csv('../data/split/train/train.csv')
test_df  = pd.read_csv('../data/split/test/test.csv')

# Encode & features
for df in [train_df, test_df]:
    df["address_code"] = LabelEncoder().fit_transform(df["address"])
    df["location_value"] = df.groupby("address")["price"].transform("mean")
    df["area_location"] = df["area"] * df["location_value"]

features = ["area", "bedrooms", "bathrooms", "area_location", "address_code"]

# X, y
X_train = train_df[features].values
y_train = train_df["price"].values.reshape(-1, 1)

X_test = test_df[features].values
y_test = test_df["price"].values.reshape(-1, 1)

# Scale only by train
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

# Convert to tensor
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)

X_test  = torch.tensor(X_test, dtype=torch.float32)
y_test  = torch.tensor(y_test, dtype=torch.float32)

# Model
model = torch.nn.Linear(X_train.shape[1], 1)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = torch.nn.MSELoss()

# Train
for _ in range(2000):
    optimizer.zero_grad()
    pred = model(X_train)
    loss = loss_fn(pred, y_train)
    loss.backward()
    optimizer.step()

# Print weights and bias
print("\n=== MODEL PARAMETERS ===")
print("Weights:", model.weight.detach().numpy())
print("Bias:", model.bias.detach().numpy())

# Evaluate
y_pred = model(X_test).detach().numpy()
print("\n=== EVALUATION ===")
print("R²:", r2_score(y_test, y_pred))
print("RMSE:", (mean_squared_error(y_test, y_pred)) ** 0.5)



=== MODEL PARAMETERS ===
Weights: [[-18.46248     2.9897747   1.8348573  18.706087   -1.7677957]]
Bias: [9.451362]

=== EVALUATION ===
R²: 0.12282150983810425
RMSE: 14.586434403359705
