In [22]:
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet, BayesianRidge, ARDRegression, SGDRegressor, PassiveAggressiveRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, BaggingRegressor, ExtraTreesRegressor, HistGradientBoostingRegressor, StackingRegressor, VotingRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

from models import ETFONN, FONN1, MLPWithDecisionTrees, TREENN1, TREENN2

In [23]:
# Load the Boston dataset
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None) # type: ignore
X = np.hstack([raw_df.values[::2, :-1], raw_df.values[1::2, :2]])
y = raw_df.values[1::2, 2]

scaler = StandardScaler()
X = scaler.fit_transform(X)
X[:5], X.shape, y[:5], y.shape

(array([[-0.41978194,  0.28482986, -1.2879095 , -0.27259857, -0.14421743,
          0.41367189, -0.12001342,  0.1402136 , -0.98284286, -0.66660821,
          0.44105193, -1.0755623 ],
        [-0.41733926, -0.48772236, -0.59338101, -0.27259857, -0.74026221,
          0.19427445,  0.36716642,  0.55715988, -0.8678825 , -0.98732948,
          0.44105193, -0.49243937],
        [-0.41734159, -0.48772236, -0.59338101, -0.27259857, -0.74026221,
          1.28271368, -0.26581176,  0.55715988, -0.8678825 , -0.98732948,
          0.39642699, -1.2087274 ],
        [-0.41675042, -0.48772236, -1.30687771, -0.27259857, -0.83528384,
          1.01630251, -0.80988851,  1.07773662, -0.75292215, -1.10611514,
          0.41616284, -1.36151682],
        [-0.41248185, -0.48772236, -1.30687771, -0.27259857, -0.83528384,
          1.22857665, -0.51117971,  1.07773662, -0.75292215, -1.10611514,
          0.44105193, -1.02650148]]),
 (506, 12),
 array([24. , 21.6, 34.7, 33.4, 36.2]),
 (506,))

In [24]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train[:5], X_train.shape, X_test[:5], X_test.shape, y_train[:5], y_train.shape, y_test[:5], y_test.shape

(array([[ 1.32780421e+00, -4.87722365e-01,  1.01599907e+00,
         -2.72598567e-01,  5.12295652e-01, -1.39706929e+00,
          1.02148094e+00, -8.05438224e-01,  1.66124525e+00,
          1.53092646e+00, -7.88779407e-02,  1.71810120e+00],
        [-3.47506015e-01, -4.87722365e-01, -4.37258013e-01,
         -2.72598567e-01, -1.44217433e-01, -6.42000190e-01,
         -4.29390392e-01,  3.34449434e-01, -6.37961799e-01,
         -6.01276097e-01,  4.27017554e-01, -5.86355801e-01],
        [-4.16483921e-01,  1.01446252e+00, -7.40749452e-01,
         -2.72598567e-01, -1.00891427e+00, -3.61342430e-01,
         -1.61000138e+00,  1.35273767e+00, -9.82842857e-01,
         -6.19093946e-01,  6.11369155e-02, -6.76067022e-01],
        [ 3.99962749e-01, -4.87722365e-01,  1.01599907e+00,
         -2.72598567e-01,  5.12295652e-01, -2.58767006e-01,
          5.87641964e-01, -8.42944849e-01,  1.66124525e+00,
          1.53092646e+00, -3.88307172e+00,  1.49101967e+00],
        [-3.36053725e-01, -4.8772236

In [25]:
# Function to train and evaluate a model
def train_evaluate_model(model, X_train, X_test, y_train, y_test):
    start_time = time.time()
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    end_time = time.time()

    r2 = r2_score(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    mse = mean_squared_error(y_test, predictions)
    comp_time = end_time - start_time

    return r2, mae, mse, comp_time


# Initialize models
models = {
    "Linear Regression": LinearRegression(),
    "Ridge Regression": Ridge(),
    "Lasso Regression": Lasso(),
    "ElasticNet Regression": ElasticNet(),
    "Bayesian Ridge Regression": BayesianRidge(),
    "ARD Regression": ARDRegression(),
    "SGD Regressor": SGDRegressor(),
    "Passive Aggressive Regressor": PassiveAggressiveRegressor(),
    "Support Vector Regression": SVR(),
    "MLP Regressor": MLPRegressor(hidden_layer_sizes=(100,), max_iter=1000, random_state=42),
    "Random Forest Regressor": RandomForestRegressor(n_estimators=100, random_state=42),
    "Gradient Boosting Regressor": GradientBoostingRegressor(random_state=42),
    "XGBoost Regressor": XGBRegressor(random_state=42),
    "AdaBoost Regressor": AdaBoostRegressor(random_state=42),
    "Bagging Regressor": BaggingRegressor(random_state=42),
    "ExtraTrees Regressor": ExtraTreesRegressor(random_state=42),
    "HistGradientBoosting Regressor": HistGradientBoostingRegressor(random_state=42),
    "Stacking Regressor": StackingRegressor(estimators=[
        ('lr', LinearRegression()),
        ('rf', RandomForestRegressor(n_estimators=10, random_state=42))
    ], final_estimator=Ridge()),
    "Voting Regressor": VotingRegressor(estimators=[
        ('lr', LinearRegression()),
        ('rf', RandomForestRegressor(n_estimators=10, random_state=42)),
        ('gb', GradientBoostingRegressor(random_state=42))
    ])
}

# Train and evaluate models
results = {}
for name, model in models.items():
    r2, mae, mse, comp_time = train_evaluate_model(model, X_train, X_test, y_train, y_test)
    results[name] = {"R² Score": r2, "MAE": mae, "MSE": mse, "Time (s)": comp_time}

In [26]:
# Initialize and train ETFONN
input_dim = X_train.shape[1]
hidden_dim = 10
output_dim = 1
num_trees_hidden = 10
epochs = 1000
learning_rate = 0.001

etfonn_model = ETFONN(input_dim, hidden_dim, output_dim, num_trees_hidden)
etfonn_model.train(X_train, y_train, epochs, learning_rate)

Epoch 0, Loss: 617.1178201368134
Epoch 100, Loss: 9.967957900380481
Epoch 200, Loss: 9.187284068929955
Epoch 300, Loss: 7.673977664441285
Epoch 400, Loss: 7.466842564663707
Epoch 500, Loss: 8.46982082997658
Epoch 600, Loss: 8.807855460524326
Epoch 700, Loss: 8.40493319501086
Epoch 800, Loss: 8.323554639202664
Epoch 900, Loss: 8.454306634575506


In [27]:
# Initialize and train FONN1
input_dim = X_train.shape[1]
hidden_dim = 10  # Increased hidden layer size
output_dim = 1
num_trees_input = 10
epochs = 40000  # Increased number of epochs
learning_rate = 0.0001  # Decreased learning rate

fonn1 = FONN1(input_dim, hidden_dim, output_dim, num_trees_input, X_train, y_train)
fonn1.train(X_train, y_train, epochs, learning_rate)

Epoch 0, Loss: 605.6274193983369
Epoch 100, Loss: 597.9259141380838
Epoch 200, Loss: 592.8148376477936
Epoch 300, Loss: 587.8356882573632
Epoch 400, Loss: 582.9015864919595
Epoch 500, Loss: 577.9997983346836
Epoch 600, Loss: 573.1263572741778
Epoch 700, Loss: 568.2795802977311
Epoch 800, Loss: 563.4586146953266
Epoch 900, Loss: 558.662975046886
Epoch 1000, Loss: 553.8923611987461


In [None]:
# Initialize and train FONN2
input_dim = X_train.shape[1]
hidden_dim = 10
output_dim = 1
num_trees_hidden = 10
epochs = 1000
learning_rate = 0.001

fonn2 = MLPWithDecisionTrees(input_dim, hidden_dim, output_dim, num_trees_hidden)
fonn2.train(X_train, y_train, epochs, learning_rate)

In [None]:
# Initialize and train TREENN1
input_dim = X_train.shape[1]
hidden_dim = 10  # Hidden layer size
output_dim = 1
epochs = 40000  # Number of epochs
learning_rate = 0.0001  # Learning rate

# Initialize and train TREENN1
treenn1 = TREENN1(input_dim, hidden_dim, output_dim, X_train, y_train)
treenn1.train(X_train, y_train, epochs, learning_rate)

In [None]:
# Initialize and train TREENN2
input_dim = X_train.shape[1]
hidden_dim = 10
output_dim = 1
epochs = 1000
learning_rate = 0.001

treenn2 = TREENN2(input_dim, hidden_dim, output_dim)
treenn2.train(X_train, y_train, epochs, learning_rate)

In [None]:
# Measure computational time and evaluate the custom ETFONN model
start_time = time.time()
etfonn_predictions = etfonn_model.forward(X_test, y_test)
end_time = time.time()
etfonn_r2 = r2_score(y_test, etfonn_predictions)
etfonn_mae = mean_absolute_error(y_test, etfonn_predictions)
etfonn_mse = mean_squared_error(y_test, etfonn_predictions)
etfonn_time = end_time - start_time

results["ETFONN"] = {"R² Score": etfonn_r2, "MAE": etfonn_mae, "MSE": etfonn_mse, "Time (s)": etfonn_time}

# Measure computational time and predict house prices using the decision trees in the hidden layer
start_time = time.time()
tree_based_predictions = etfonn_model.tree_predict(X_test)
end_time = time.time()
tree_based_r2 = r2_score(y_test, tree_based_predictions)
tree_based_mae = mean_absolute_error(y_test, tree_based_predictions)
tree_based_mse = mean_squared_error(y_test, tree_based_predictions)
tree_based_time = end_time - start_time

results["Tree-based Predictions (ETFONN)"] = {"R² Score": tree_based_r2, "MAE": tree_based_mae, "MSE": tree_based_mse, "Time (s)": tree_based_time}

# Combine 10 decision trees and evaluate the ensemble model
trees = [DecisionTreeRegressor(max_depth=5, random_state=i) for i in range(10)]

for tree in trees:
    tree.fit(X_train, y_train)

ensemble_predictions = np.mean([tree.predict(X_test) for tree in trees], axis=0)
ensemble_r2 = r2_score(y_test, ensemble_predictions)
ensemble_mae = mean_absolute_error(y_test, ensemble_predictions)
ensemble_mse = mean_squared_error(y_test, ensemble_predictions)
ensemble_time = sum([train_evaluate_model(tree, X_train, X_test, y_train, y_test)[3] for tree in trees])

results["Ensemble of 10 Trees (ETFONN)"] = {"R² Score": ensemble_r2, "MAE": ensemble_mae, "MSE": ensemble_mse, "Time (s)": ensemble_time}

In [None]:
# Measure computational time and evaluate the FONN1 model
start_time = time.time()
fonn1_predictions = fonn1.forward(X_test)
end_time = time.time()
fonn1_r2 = r2_score(y_test, fonn1_predictions)
fonn1_mae = mean_absolute_error(y_test, fonn1_predictions)
fonn1_mse = mean_squared_error(y_test, fonn1_predictions)
fonn1_time = end_time - start_time

results["FONN1"] = {"R² Score": fonn1_r2, "MAE": fonn1_mae, "MSE": fonn1_mse, "Time (s)": fonn1_time}

In [None]:
# Measure computational time and evaluate the custom MLP model
start_time = time.time()
custom_predictions = fonn2.forward(X_test, y_test)
end_time = time.time()
custom_r2 = r2_score(y_test, custom_predictions)
custom_mae = mean_absolute_error(y_test, custom_predictions)
custom_mse = mean_squared_error(y_test, custom_predictions)
custom_time = end_time - start_time

results["FONN2"] = {"R² Score": custom_r2, "MAE": custom_mae, "MSE": custom_mse, "Time (s)": custom_time}

# Measure computational time and predict house prices using the decision trees in the hidden layer
start_time = time.time()
tree_based_predictions = fonn2.tree_predict(X_test)
end_time = time.time()
tree_based_r2 = r2_score(y_test, tree_based_predictions)
tree_based_mae = mean_absolute_error(y_test, tree_based_predictions)
tree_based_mse = mean_squared_error(y_test, tree_based_predictions)
tree_based_time = end_time - start_time

results["Tree-based Predictions (FONN2)"] = {"R² Score": tree_based_r2, "MAE": tree_based_mae, "MSE": tree_based_mse, "Time (s)": tree_based_time}

# Combine 10 decision trees and evaluate the ensemble model
trees = [DecisionTreeRegressor(max_depth=5, random_state=i) for i in range(10)]

for tree in trees:
    tree.fit(X_train, y_train)

ensemble_predictions = np.mean([tree.predict(X_test) for tree in trees], axis=0)
ensemble_r2 = r2_score(y_test, ensemble_predictions)
ensemble_mae = mean_absolute_error(y_test, ensemble_predictions)
ensemble_mse = mean_squared_error(y_test, ensemble_predictions)
ensemble_time = sum([train_evaluate_model(tree, X_train, X_test, y_train, y_test)[3] for tree in trees])

results["Ensemble of 10 Trees (FONN2)"] = {"R² Score": ensemble_r2, "MAE": ensemble_mae, "MSE": ensemble_mse, "Time (s)": ensemble_time}

In [None]:
# Measure computational time and evaluate the TREENN1 model
start_time = time.time()
treenn1_predictions = treenn1.forward(X_test)
end_time = time.time()
treenn1_r2 = r2_score(y_test, treenn1_predictions)
treenn1_mae = mean_absolute_error(y_test, treenn1_predictions)
treenn1_mse = mean_squared_error(y_test, treenn1_predictions)
treenn1_time = end_time - start_time

results["TREENN1"] = {"R² Score": treenn1_r2, "MAE": treenn1_mae, "MSE": treenn1_mse, "Time (s)": treenn1_time}

In [None]:
# Measure computational time and evaluate the custom MLP model
start_time = time.time()
custom_predictions = treenn2.forward(X_test, y_test)
end_time = time.time()
custom_r2 = r2_score(y_test, custom_predictions)
custom_mae = mean_absolute_error(y_test, custom_predictions)
custom_mse = mean_squared_error(y_test, custom_predictions)
custom_time = end_time - start_time

results["TREENN2"] = {"R² Score": custom_r2, "MAE": custom_mae, "MSE": custom_mse, "Time (s)": custom_time}

# Measure computational time and predict house prices using the decision tree in the hidden layer
start_time = time.time()
tree_based_predictions = treenn2.tree_hidden.predict(treenn2.a1)
end_time = time.time()
tree_based_r2 = r2_score(y_test, tree_based_predictions)
tree_based_mae = mean_absolute_error(y_test, tree_based_predictions)
tree_based_mse = mean_squared_error(y_test, tree_based_predictions)
tree_based_time = end_time - start_time

results["Tree-based Predictions (TREENN2)"] = {"R² Score": tree_based_r2, "MAE": tree_based_mae, "MSE": tree_based_mse, "Time (s)": tree_based_time}

In [None]:
# Convert results to a DataFrame for better visualization
results_df = pd.DataFrame(results).T
print(results_df)

In [None]:
# Get and print tree importances
tree_importances = etfonn_model.get_tree_importances()