### Problem 1: Blending Scratch Implementation

In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [2]:
# Load dataset
data = pd.read_csv('./train.csv')

In [3]:
# Select features and target
X = data[['GrLivArea', 'YearBuilt']]
y = data['SalePrice']

In [4]:
# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# Train individual models
model1 = LinearRegression().fit(X_train, y_train)
model2 = SVR().fit(X_train, y_train)
model3 = DecisionTreeRegressor().fit(X_train, y_train)

In [6]:
# Make predictions
pred1 = model1.predict(X_val)
pred2 = model2.predict(X_val)
pred3 = model3.predict(X_val)


In [7]:
# Blending: average predictions
blended_pred = (pred1 + pred2 + pred3) / 3

In [8]:
# Evaluate
mse_blended = mean_squared_error(y_val, blended_pred)
print(f'Blended MSE: {mse_blended}')

Blended MSE: 2797193029.709079


## Problem 2: Bagging Scratch Implementation

In [9]:
from sklearn.utils import resample


In [10]:
# Bootstrap sampling
n_samples = X_train.shape[0]
n_estimators = 10
bagged_predictions = np.zeros((X_val.shape[0], n_estimators))


In [11]:
for i in range(n_estimators):
    X_resampled, y_resampled = resample(X_train, y_train)
    model = DecisionTreeRegressor().fit(X_resampled, y_resampled)
    bagged_predictions[:, i] = model.predict(X_val)


In [12]:
# Average predictions
bagged_pred = np.mean(bagged_predictions, axis=1)


In [13]:
# Evaluate
mse_bagging = mean_squared_error(y_val, bagged_pred)
print(f'Bagging MSE: {mse_bagging}')


Bagging MSE: 1833038622.8168507


## Problem 3: Stacking Scratch Implementation


In [14]:
from sklearn.linear_model import Ridge

In [15]:
# Train base models
model1 = LinearRegression().fit(X_train, y_train)
model2 = SVR().fit(X_train, y_train)
model3 = DecisionTreeRegressor().fit(X_train, y_train)


In [16]:
# Create meta-features
meta_train = np.column_stack([model1.predict(X_train), model2.predict(X_train), model3.predict(X_train)])
meta_val = np.column_stack([model1.predict(X_val), model2.predict(X_val), model3.predict(X_val)])


In [17]:
# Train meta-model
meta_model = Ridge().fit(meta_train, y_train)

In [18]:
# Make predictions with meta-model
stacking_pred = meta_model.predict(meta_val)


In [19]:
# Evaluate
mse_stacking = mean_squared_error(y_val, stacking_pred)
print(f'Stacking MSE: {mse_stacking}')


Stacking MSE: 2126609508.0588863
