In [None]:
import pandas as pd
import numpy as np
# sklearn load_boston no longer works
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
target = raw_df.values[1::2, 2]

In [None]:
# make the data and target np arrays into a dataframe with appropriate column names
column_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']
data_values = pd.DataFrame(data, columns=column_names)
data_values['MEDV'] = target
data_values.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [None]:
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split as tts
from sklearn.preprocessing import StandardScaler

In [None]:
# declaring the features and target and scaling features
X = data_values[column_names]
y = data_values['MEDV']

scale = StandardScaler()

X = scale.fit_transform(X)

In [None]:
# creating testing and training splits
X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2)

In [None]:
# helper function that will declare the model with the desired parameters, fit it, and print the mse
def get_mse(learning_rate, regularization):
  sgdr = SGDRegressor(eta0=learning_rate, penalty=regularization)
  sgdr.fit(X_train, y_train)
  y_pred = sgdr.predict(X_test)
  mse = mean_squared_error(y_true=y_test, y_pred=y_pred)
  print(f'Mean squared error (L{regularization[1:]} regularization & {learning_rate} learning rate):', mse)


In [None]:
# we want to test different learning rates (0.001, 0.01, 0.1) with both L1 and L2 regularization
l1 = 'l1'
# L1 regularization with eta = 0.001
get_mse(0.001, l1)
# L1 regularization with eta = 0.01
get_mse(0.01, l1)
# L1 regularization with eta = 0.1
get_mse(0.1, l1)

Mean squared error (L1 regularization & 0.001 learning rate): 21.92222452784237
Mean squared error (L1 regularization & 0.01 learning rate): 20.95554889712384
Mean squared error (L1 regularization & 0.1 learning rate): 23.63988398964223


In [None]:
# now do the above process with l2 regularization
l2 = 'l2'
# L2 regularization with eta = 0.001
get_mse(0.001, l2)
# L2 regularization with eta = 0.01
get_mse(0.01, l2)
# L2 regularization with eta = 0.1
get_mse(0.1, l2)

Mean squared error (L2 regularization & 0.001 learning rate): 21.925061209267042
Mean squared error (L2 regularization & 0.01 learning rate): 21.196935833890677
Mean squared error (L2 regularization & 0.1 learning rate): 22.741410825375823


In [None]:
# now train AdaBoost Classifier on the data and compare the results
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor

In [None]:
# declare decision stumps to use in AdaBoost
stump = DecisionTreeRegressor(max_depth=1)

In [None]:
# declare AdaBoost model
adaboost = AdaBoostRegressor(estimator=stump, n_estimators=5000, learning_rate=0.1)

In [None]:
# get predictions from AdaBoost model and get MSE to compare with linear regression models
adaboost.fit(X_train, y_train)
y_pred = adaboost.predict(X_test)

mse = mean_squared_error(y_true=y_test, y_pred=y_pred)
# using AdaBoost with strict decision stumps (max_depth = 1) performs poorly as the data has too
# many features and the stumps are not able to provide enough information
print('Mean squared error (AdaBoost Regression with decision stumps):', mse)

Mean squared error (AdaBoost Regression with decision stumps): 42.920877266416646


In [None]:
# try AdaBoost with more complex base estimator
tree = DecisionTreeRegressor(max_depth=4)
adaboost = AdaBoostRegressor(estimator=tree, n_estimators=5000, learning_rate=0.1)

adaboost.fit(X_train, y_train)
y_pred = adaboost.predict(X_test)

mse = mean_squared_error(y_true=y_test, y_pred=y_pred)
# using AdaBoost with strict decision trees allow AdaBoost to gather more information
# and thus make better predictions
print('Mean squared error (AdaBoost Regression with decision trees):', mse)

Mean squared error (AdaBoost Regression with decision trees): 8.436896424592884


In [None]:
# now try training a neural network to perform the regression
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, InputLayer, BatchNormalization
from keras import regularizers

In [None]:
# define the neural network and add an input + hidden layers
nn = Sequential()
nn.add(InputLayer((X_train.shape[1],)))
# add a lay with regularization
nn.add(Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.1)))
# Batch normalization to normalize the outputs from each hidden layer before passing them along
# Batch normalization ensures that the data distribution remains consistent
nn.add(BatchNormalization())
nn.add(Dense(8, activation='relu'))
nn.add(BatchNormalization())
# linear activation function for output layer in regression
nn.add(Dense(1, activation='linear'))

In [None]:
# compile the neural network and fit it to the data
nn.compile(optimizer='adam', loss='mse', metrics=['mse'])

nn.fit(X_train, y_train, validation_split=0.2, epochs=100, batch_size=8, verbose=1)

Epoch 1/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 620.5186 - mse: 619.1216 - val_loss: 564.9070 - val_mse: 563.5139
Epoch 2/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 611.0092 - mse: 609.6167 - val_loss: 553.6186 - val_mse: 552.2298
Epoch 3/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 535.9164 - mse: 534.5287 - val_loss: 538.7565 - val_mse: 537.3727
Epoch 4/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 617.4293 - mse: 616.0469 - val_loss: 522.4067 - val_mse: 521.0292
Epoch 5/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 500.8579 - mse: 499.4824 - val_loss: 504.8125 - val_mse: 503.4418
Epoch 6/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 552.2869 - mse: 550.9177 - val_loss: 489.6868 - val_mse: 488.3235
Epoch 7/100
[1m41/41[0m [32m━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x79295b7560e0>

In [None]:
# now get predictions on the test data and mse
y_pred = nn.predict(X_test)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


In [None]:
# it is clear that the neural network performed well but also required the most computational resources
# the neural network was able to capture nuances and nonlinearity in the data that the linear regression
# may not have been able to. However, AdaBoost with decision trees performed better and was both easier to
# write and computationally more efficient than a neural network
mse = mean_squared_error(y_pred=y_pred, y_true=y_test)
print('Mean squared error (neural network):', mse)

Mean squared error (neural network): 10.011927650584022
