In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# 1) Load the cleaned dataset
# Ensure your path matches where your cleaned data is saved!
CSV_PATH = "../Assigment-three/car_l3_clean_ready.csv" 
df = pd.read_csv(CSV_PATH)

# 2) Split features (X) and target (y)
# Price is our target; we drop it from X so the model doesn't "cheat"
X = df.drop(columns=["Price"]) 
y = df["Price"]

# 3) Train/test split (80% Train, 20% Test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
# 4) Train Linear Regression (The simple approach)
lr = LinearRegression()
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)

# 5) Train Random Forest (The powerful approach)
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)

# 6) Metrics Helper Function
def print_metrics(name, y_true, y_pred):
    r2 = r2_score(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    print(f"\n{name} Performance:")
    print(f"  R²   : {r2:.3f}")
    print(f"  MAE  : {mae:,.0f}")
    print(f"  RMSE : {rmse:,.0f}")

print_metrics("Linear Regression", y_test, lr_pred)
print_metrics("Random Forest", y_test, rf_pred)


Linear Regression Performance:
  R²   : -0.023
  MAE  : 1,200
  RMSE : 1,351

Random Forest Performance:
  R²   : 0.791
  MAE  : 531
  RMSE : 610


In [4]:
# 8) Single-row prediction (sanity check)
i = 0
x_one_df = X_test.iloc[[i]]   
y_true   = y_test.iloc[i]     

p_lr_one = float(lr.predict(x_one_df)[0])
p_rf_one = float(rf.predict(x_one_df)[0])

print("\nSingle-row sanity check:")
print(f"  Actual Price: ${y_true:,.0f}")
print(f"  LR Pred     : ${p_lr_one:,.0f}")
print(f"  RF Pred     : ${p_rf_one:,.0f}")


Single-row sanity check:
  Actual Price: $4,171
  LR Pred     : $5,992
  RF Pred     : $3,339
