In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score

In [15]:
df = pd.read_csv('train.csv')
print(df.shape)
print(df.columns)

(1460, 81)
Index(['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street',
       'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig',
       'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType',
       'HouseStyle', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd',
       'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType',
       'MasVnrArea', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual',
       'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1',
       'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating',
       'HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF',
       'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
       'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual',
       'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType',
       'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual',
       'GarageCond', 

In [16]:
df = df.drop(['Id', 'PoolQC', 'MiscFeature', 'Alley', 'Fence'], axis=1)

In [17]:
y = df['SalePrice']
X = df.drop(['SalePrice'], axis=1)

In [18]:
num_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
cat_cols = X.select_dtypes(include=['object']).columns.tolist()

num_imputer = SimpleImputer(strategy='median')
X[num_cols] = num_imputer.fit_transform(X[num_cols])

cat_imputer = SimpleImputer(strategy='constant', fill_value='None')
X[cat_cols] = cat_imputer.fit_transform(X[cat_cols])

In [19]:
X['TotalBathrooms'] = (X['FullBath'] + 0.5 * X['HalfBath'] +
                       X['BsmtFullBath'] + 0.5 * X['BsmtHalfBath'])
X['TotalSF'] = X['TotalBsmtSF'] + X['1stFlrSF'] + X['2ndFlrSF']
X['Age'] = X['YrSold'] - X['YearBuilt']
X['RemodAge'] = X['YrSold'] - X['YearRemodAdd']
X['GarageAge'] = X['YrSold'] - X['GarageYrBlt']

X['GarageAge'] = X['GarageAge'].fillna(X['GarageAge'].median())

In [20]:
X = pd.get_dummies(X, columns=cat_cols, drop_first=True)

In [21]:
scaler = StandardScaler()
num_features = num_cols + ['TotalBathrooms', 'TotalSF', 'Age', 'RemodAge', 'GarageAge']
X[num_features] = scaler.fit_transform(X[num_features])

In [22]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [23]:
ridge = Ridge(alpha=10.0)
ridge.fit(X_train, y_train)

In [24]:
y_pred = ridge.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(" Ridge Regression Model Performance:")
print(f"RMSE: {rmse:.2f}")
print(f"R² Score: {r2:.4f}")
print("Sample Predictions:", y_pred[:5])

 Ridge Regression Model Performance:
RMSE: 30567.31
R² Score: 0.8782
Sample Predictions: [160529.6620113  333765.43983998  97155.97252818 180472.84734728
 330473.78652656]
