In [20]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler

from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error, r2_score


In [22]:
train_df = pd.read_csv('data/train.csv')

y = train_df['SalePrice']
X = train_df.drop(['SalePrice', 'Id'], axis=1)


In [23]:
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [24]:
num_features = X_train.select_dtypes(include=['int64', 'float64']).columns
cat_features = X_train.select_dtypes(include=['object']).columns


In [None]:
num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])
#numerical pipeline

In [26]:
cat_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])
#categorical pipeline

In [27]:
preprocessor = ColumnTransformer([
    ('num', num_pipeline, num_features),
    ('cat', cat_pipeline, cat_features)
])
model = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

In [28]:
ridge_model = Pipeline([
    ('preprocessor', preprocessor), 
    ('regressor', Ridge(alpha=1.0))
])

In [29]:
lasso_model = Pipeline([
    ('preprocess', preprocessor),
    ('model', Lasso(alpha=0.001))
])


In [30]:
def evaluate(model, X_train, X_val, y_train, y_val):
    model.fit(X_train, y_train)
    preds = model.predict(X_val)
    rmse = np.sqrt(mean_squared_error(y_val, preds))
    r2 = r2_score(y_val, preds)
    return rmse, r2


In [32]:
lin_rmse, lin_r2 = evaluate(model, X_train, X_val, y_train, y_val)
ridge_rmse, ridge_r2 = evaluate(ridge_model, X_train, X_val, y_train, y_val)
lasso_rmse, lasso_r2 = evaluate(lasso_model, X_train, X_val, y_train, y_val)

print("Linear   → RMSE:", lin_rmse, " R²:", lin_r2)
print("Ridge    → RMSE:", ridge_rmse, " R²:", ridge_r2)
print("Lasso    → RMSE:", lasso_rmse, " R²:", lasso_r2)


Linear   → RMSE: 29473.873055291337  R²: 0.8867441658393297
Ridge    → RMSE: 29841.823697205615  R²: 0.8838987524753943
Lasso    → RMSE: 28313.70432773389  R²: 0.8954847771653867


  model = cd_fast.sparse_enet_coordinate_descent(
