In [None]:
import sys
import os

sys.path.append(os.path.abspath(".."))

In [None]:
import matplotlib.pyplot as plt
from typing import Dict
from src.housing import train, eval, load_dataset, set_seed

SEED = 1
set_seed(SEED)

In [None]:
def plot_results(model: str, result: Dict, out_dir: str = "figures"):
    os.makedirs(out_dir, exist_ok=True)

    plt.figure()
    plt.hist(y_test - test_results["preds"], bins=40)
    plt.title("Residuals (y - y_pred)")
    plt.xlabel("Residual")
    plt.ylabel("Count")
    plt.tight_layout()
    plt.savefig(os.path.join(out_dir, f"{model}_residuals.png"))
    plt.show()

    plt.figure()
    plt.scatter(y_test, test_results["preds"], s=6)
    plt.title("Predicted vs True")
    plt.xlabel("True")
    plt.ylabel("Pred")
    plt.tight_layout()
    plt.savefig(os.path.join(out_dir, f"{model}_pred_vs_true.png"))
    plt.show()

In [None]:
X_train, X_test, y_train, y_test = load_dataset(test_size=0.2, random_state=SEED)

### Simple Linear Regression

In [None]:
fitted_gs = train("simple_elastic", X_train, y_train, random_state=SEED)
test_results = eval(fitted_gs, X_test, y_test)

In [None]:
print(f"Best alpha: {test_results['estimator'].alpha}")
print(f"Best L1 ratio: {test_results['estimator'].l1_ratio}")
print("=" * 30)
print(f"RMSE: {test_results['rmse']:.2f}")
print(f"MAE: {test_results['mae']:.2f}")
print(f"R2: {test_results['r2']:.2f}")

In [None]:
plot_results("simple_elastic", test_results)

### Polynomial Regression

In [None]:
fitted_gs = train("poly_elastic", X_train, y_train, random_state=SEED)
test_results = eval(fitted_gs, X_test, y_test)

In [None]:
print(f"Best alpha: {test_results['estimator'].alpha}")
print(f"Best L1 ratio: {test_results['estimator'].l1_ratio}")
print("=" * 30)
print(f"RMSE: {test_results['rmse']:.2f}")
print(f"MAE: {test_results['mae']:.2f}")
print(f"R2: {test_results['r2']:.2f}")

In [None]:
plot_results("poly_elastic", test_results)

### kNN

In [None]:
fitted_gs = train("knn", X_train, y_train, random_state=SEED)
test_results = eval(fitted_gs, X_test, y_test)

In [None]:
print(f"Best n_neighbors: {test_results['estimator'].n_neighbors}")
print(f"Best metric: {test_results['estimator'].p}")
print("=" * 30)
print(f"RMSE: {test_results['rmse']:.2f}")
print(f"MAE: {test_results['mae']:.2f}")
print(f"R2: {test_results['r2']:.2f}")

In [None]:
plot_results("knn", test_results)