# Import Libraries

In [138]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [139]:
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor, plot_tree
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, StackingRegressor

# Load Datasets

In [140]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("arunjangir245/boston-housing-dataset")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/boston-housing-dataset


In [141]:
import os

for dirname, _, filenames in os.walk(path):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/boston-housing-dataset/BostonHousing.csv


In [142]:
df = pd.read_csv(path + '/BostonHousing.csv')

In [143]:
df.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [144]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 14 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   crim     506 non-null    float64
 1   zn       506 non-null    float64
 2   indus    506 non-null    float64
 3   chas     506 non-null    int64  
 4   nox      506 non-null    float64
 5   rm       501 non-null    float64
 6   age      506 non-null    float64
 7   dis      506 non-null    float64
 8   rad      506 non-null    int64  
 9   tax      506 non-null    int64  
 10  ptratio  506 non-null    float64
 11  b        506 non-null    float64
 12  lstat    506 non-null    float64
 13  medv     506 non-null    float64
dtypes: float64(11), int64(3)
memory usage: 55.5 KB


In [145]:
df.duplicated().sum()

np.int64(0)

In [146]:
df.dropna(axis=0, inplace=True)

In [147]:
df.isnull().sum().sum()

np.int64(0)

# Train Test Split

In [148]:
X = df.drop('medv', axis=1)
y = df['medv']

In [149]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [150]:
# X_train.info

# Apply Base Models

In [151]:
model1 = LinearRegression()
model2 = DecisionTreeRegressor()
model3 = KNeighborsRegressor()

In [152]:
model1.fit(X_train, y_train)
model2.fit(X_train, y_train)
model3.fit(X_train, y_train)

In [153]:
r2_score(y_test, model1.predict(X_test))

0.7200277678580317

In [154]:
r2_score(y_test, model2.predict(X_test))

0.7701852019748021

In [155]:
r2_score(y_test, model3.predict(X_test))

0.5443062501835765

# Apply Stacking Regressor

```python
class sklearn.ensemble.StackingRegressor(
  estimators,
  final_estimator=None,
  cv=None,
  n_jobs=None,
  passthrough=False,
  verbose=0
)

```

## Model 1

In [156]:
estimators = [
    ('lr', LinearRegression()),
    ('knn', KNeighborsRegressor(n_neighbors=10)),
    ('dt', DecisionTreeRegressor())
]

In [157]:
stacking_model = StackingRegressor(
    estimators = estimators,
    final_estimator = GradientBoostingRegressor(),
    cv = 10
)

In [158]:
stacking_model.fit(X_train, y_train)

In [159]:
stacking_model.estimators_

[LinearRegression(),
 KNeighborsRegressor(n_neighbors=10),
 DecisionTreeRegressor()]

In [160]:
stacking_model.final_estimator_

In [161]:
r2_score(y_test, stacking_model.predict(X_test))

0.8705098523836581

## Model 2

In [162]:
stacking_mode2 = StackingRegressor(
    estimators = estimators,
    final_estimator = RandomForestRegressor(),
    cv = 10
)

In [163]:
stacking_mode2.fit(X_train, y_train)

In [164]:
r2_score(y_test, stacking_mode2.predict(X_test))

0.871990127256393