# Import Libraries

In [39]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [40]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, VotingRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [41]:
import os

# Import dataset

In [42]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("schirmerchad/bostonhoustingmlnd")

print("Path to dataset files:", path)

Path to dataset files: /root/.cache/kagglehub/datasets/schirmerchad/bostonhoustingmlnd/versions/1


In [43]:
for filename in os.listdir(path):
    print(filename)

housing.csv


In [44]:
df = pd.read_csv(path + '/housing.csv')
df.head()

Unnamed: 0,RM,LSTAT,PTRATIO,MEDV
0,6.575,4.98,15.3,504000.0
1,6.421,9.14,17.8,453600.0
2,7.185,4.03,17.8,728700.0
3,6.998,2.94,18.7,701400.0
4,7.147,5.33,18.7,760200.0


# Apply StandardScaler to all features

In [45]:
scaler = StandardScaler()
scaler.fit(df.drop('MEDV', axis=1))
scaled_features = scaler.transform(df.drop('MEDV', axis=1))

In [46]:
pd.DataFrame(scaled_features, columns=df.columns[:-1]).head()

Unnamed: 0,RM,LSTAT,PTRATIO
0,0.520554,-1.125077,-1.525083
1,0.281048,-0.53707,-0.339748
2,1.469245,-1.259357,-0.339748
3,1.178417,-1.413427,0.086973
4,1.410146,-1.075605,0.086973


In [47]:
new_df = pd.DataFrame(scaled_features, columns=df.columns[:-1])
new_df.head()

Unnamed: 0,RM,LSTAT,PTRATIO
0,0.520554,-1.125077,-1.525083
1,0.281048,-0.53707,-0.339748
2,1.469245,-1.259357,-0.339748
3,1.178417,-1.413427,0.086973
4,1.410146,-1.075605,0.086973


# Train Test Split

In [48]:
X = new_df
y = df['MEDV']

In [49]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

# Apply Ensemble Regression

In [50]:
reg1 = LinearRegression()
reg2 = DecisionTreeRegressor()
reg3 = SVR()

In [51]:
estimators = [('lr', reg1), ('dt', reg2), ('svr', reg3)]

In [52]:
for est in estimators:
    scores = cross_val_score(est[1], X_train, y_train, scoring='r2', cv=10)
    print(est[0], np.round(np.mean(scores), 2))

lr 0.66
dt 0.6
svr -0.02


In [53]:
vr1 = VotingRegressor(estimators=estimators,  weights=[5, 4, 1])
scores = cross_val_score(vr1, X_train, y_train, scoring='r2', cv=10)
print(np.round(np.mean(scores), 2))


0.72


In [54]:
for i in range(1, 4):
    for j in range(1, 4):
        for k in range(1, 4):
            vr = VotingRegressor(estimators=estimators, weights=[i, j, k])
            scores = cross_val_score(vr, X_train, y_train, scoring='r2', cv=10)
            print(i, j, k, np.round(np.mean(scores), 2))


1 1 1 0.65
1 1 2 0.55
1 1 3 0.47
1 2 1 0.69
1 2 2 0.62
1 2 3 0.55
1 3 1 0.68
1 3 2 0.65
1 3 3 0.6
2 1 1 0.68
2 1 2 0.61
2 1 3 0.54
2 2 1 0.7
2 2 2 0.65
2 2 3 0.6
2 3 1 0.7
2 3 2 0.67
2 3 3 0.64
3 1 1 0.69
3 1 2 0.64
3 1 3 0.58
3 2 1 0.7
3 2 2 0.67
3 2 3 0.63
3 3 1 0.71
3 3 2 0.69
3 3 3 0.65


In [55]:
reg1.fit(X_train, y_train)
reg2.fit(X_train, y_train)
reg3.fit(X_train, y_train)

In [56]:
reg1.score(X_test, y_test)

0.7192726119820875

# With Homogeious Model

In [57]:
reg5 = DecisionTreeRegressor(max_depth=1)
reg6 = DecisionTreeRegressor(max_depth=2)
reg7 = DecisionTreeRegressor(max_depth=3)
reg8 = DecisionTreeRegressor(max_depth=4)
reg9 = DecisionTreeRegressor(max_depth=5)
re10 = DecisionTreeRegressor()

In [58]:
estimators2 = [('dt1', reg5), ('dt2', reg6), ('dt3', reg7), ('dt4', reg8), ('dt5', reg9), ('dt6', re10)]


In [60]:
vr2 = VotingRegressor(estimators=estimators2)
scores2 = cross_val_score(vr2, X_train, y_train, scoring='r2', cv=10)
print(np.round(np.mean(scores2), 2))

0.73


In [61]:
for est in estimators2:
    scores = cross_val_score(est[1], X_train, y_train, scoring='r2', cv=10)
    print(est[0], np.round(np.mean(scores), 2))

dt1 0.32
dt2 0.59
dt3 0.69
dt4 0.74
dt5 0.72
dt6 0.61
