In [1]:
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet, BayesianRidge, ARDRegression, SGDRegressor, PassiveAggressiveRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, BaggingRegressor, ExtraTreesRegressor, HistGradientBoostingRegressor, StackingRegressor, VotingRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

from models import FONN1, FONN2, TREENN1, TREENN2

In [2]:
# Load the Boston dataset
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22,  # type: ignore
                     header=None)  # type: ignore
X = np.hstack([raw_df.values[::2, :-1], raw_df.values[1::2, :2]])
y = raw_df.values[1::2, 2]

scaler = StandardScaler()
X = scaler.fit_transform(X)
X.shape, y.shape

((506, 12), (506,))

In [3]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((404, 12), (102, 12), (404,), (102,))

In [4]:
# Function to train and evaluate a model
def train_evaluate_model(model, X_train, X_test, y_train, y_test):
    start_time = time.time()
    model.fit(X_train, y_train)
    end_time = time.time()
    train_time = end_time - start_time

    start_time = time.time()
    predictions = model.predict(X_test)
    end_time = time.time()
    comp_time = end_time - start_time

    r2 = r2_score(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    mse = mean_squared_error(y_test, predictions)

    return r2, mae, mse, train_time, comp_time


# Initialize models
models = {
    "Linear Regression": LinearRegression(),
    "Ridge Regression": Ridge(),
    "Lasso Regression": Lasso(),
    "ElasticNet Regression": ElasticNet(),
    "Bayesian Ridge Regression": BayesianRidge(),
    "ARD Regression": ARDRegression(),
    "SGD Regressor": SGDRegressor(),
    "Passive Aggressive Regressor": PassiveAggressiveRegressor(),
    "Support Vector Regression": SVR(),
    "MLP Regressor": MLPRegressor(hidden_layer_sizes=(100,), max_iter=1000, random_state=42),
    "Random Forest Regressor": RandomForestRegressor(n_estimators=100, random_state=42),
    "Gradient Boosting Regressor": GradientBoostingRegressor(random_state=42),
    "XGBoost Regressor": XGBRegressor(random_state=42),
    "AdaBoost Regressor": AdaBoostRegressor(random_state=42),
    "Bagging Regressor": BaggingRegressor(random_state=42),
    "ExtraTrees Regressor": ExtraTreesRegressor(random_state=42),
    "HistGradientBoosting Regressor": HistGradientBoostingRegressor(random_state=42),
    "Stacking Regressor": StackingRegressor(estimators=[
        ('lr', LinearRegression()),
        ('rf', RandomForestRegressor(n_estimators=10, random_state=42))
    ], final_estimator=Ridge()),
    "Voting Regressor": VotingRegressor(estimators=[
        ('lr', LinearRegression()),
        ('rf', RandomForestRegressor(n_estimators=10, random_state=42)),
        ('gb', GradientBoostingRegressor(random_state=42))
    ])
}

# Train and evaluate models
results = {}
for name, model in models.items():
    r2, mae, mse, fit_time, comp_time = train_evaluate_model(
        model, X_train, X_test, y_train, y_test)
    results[name] = {"R² Score": r2, "MAE": mae, "MSE": mse,
                     "Train Time (s)": fit_time, "Comp Time (s)": comp_time}

In [5]:
# # Initialize and train MLP
# input_dim = X_train.shape[1]
# hidden_dim = 10  # Increased hidden layer size
# output_dim = 1
# num_trees_input = 0
# epochs = 40000  # Increased number of epochs
# learning_rate = 0.0001  # Decreased learning rate

# start_time = time.time()
# mlp = FONN1(input_dim, hidden_dim, output_dim, num_trees_input)
# mlp.train(X_train, y_train, epochs, learning_rate)
# end_time = time.time()
# mlp_train_time = end_time - start_time

In [6]:
# Initialize and train FONN1
input_dim = X_train.shape[1]
hidden_dim = 10  # Increased hidden layer size
output_dim = 1
num_trees_input = 10
epochs = 40000  # Increased number of epochs
learning_rate = 0.0001  # Decreased learning rate

start_time = time.time()
fonn1 = FONN1(input_dim, hidden_dim, output_dim, num_trees_input)
fonn1.train(X_train, y_train, epochs, learning_rate)
end_time = time.time()
fonn1_train_time = end_time - start_time

Epoch 0, Loss: 606.5360967734182
Epoch 100, Loss: 599.2096840195194
Epoch 200, Loss: 594.0552385721818
Epoch 300, Loss: 589.0655737916654
Epoch 400, Loss: 584.1237543426554
Epoch 500, Loss: 579.2150262345203
Epoch 600, Loss: 574.3349791755749
Epoch 700, Loss: 569.4817738100883
Epoch 800, Loss: 564.6544869575226
Epoch 900, Loss: 559.8525962484895
Epoch 1000, Loss: 555.0757800738841
Epoch 1100, Loss: 550.3238276073417
Epoch 1200, Loss: 545.5965937836214
Epoch 1300, Loss: 540.8939748668694
Epoch 1400, Loss: 536.2158943155102
Epoch 1500, Loss: 531.5622941686872
Epoch 1600, Loss: 526.9331295670908
Epoch 1700, Loss: 522.3283651393835
Epoch 1800, Loss: 517.7479725444251
Epoch 1900, Loss: 513.1919287545691
Epoch 2000, Loss: 508.6602148284715
Epoch 2100, Loss: 504.15281501578283
Epoch 2200, Loss: 499.66971609207565
Epoch 2300, Loss: 495.2109068567978
Epoch 2400, Loss: 490.7763777487826
Epoch 2500, Loss: 486.3661205479375
Epoch 2600, Loss: 481.9801281410508
Epoch 2700, Loss: 477.6183943359545
Ep

In [7]:
# Initialize and train FONN2
input_dim = X_train.shape[1]
hidden_dim = 10
output_dim = 1
num_trees_hidden = 10
epochs = 1000
learning_rate = 0.001

start_time = time.time()
fonn2 = FONN2(input_dim, hidden_dim, output_dim, num_trees_hidden)
fonn2.train(X_train, y_train, epochs, learning_rate)
end_time = time.time()
fonn2_train_time = end_time - start_time

Epoch 0, Loss: 569.7653854462766
Epoch 100, Loss: 7.090767707879386
Epoch 200, Loss: 7.090059135189161
Epoch 300, Loss: 7.089212969746764
Epoch 400, Loss: 7.0882124026171835
Epoch 500, Loss: 7.0870391470611045
Epoch 600, Loss: 7.085673492068558
Epoch 700, Loss: 7.084094447906731
Epoch 800, Loss: 7.082280002014757
Epoch 900, Loss: 7.080207502535303


In [8]:
# Initialize and train TREENN1
input_dim = X_train.shape[1]
hidden_dim = 10  # Hidden layer size
output_dim = 1
epochs = 40000  # Number of epochs
learning_rate = 0.0001  # Learning rate

start_time = time.time()
treenn1 = TREENN1(input_dim, hidden_dim, output_dim)
treenn1.train(X_train, y_train, epochs, learning_rate)
end_time = time.time()
treenn1_train_time = end_time - start_time

Epoch 0, Loss: 605.8179568116229


Epoch 100, Loss: 602.954071461698
Epoch 200, Loss: 598.86144304153
Epoch 300, Loss: 593.9249777615461
Epoch 400, Loss: 588.592885091757
Epoch 500, Loss: 583.1331516621993
Epoch 600, Loss: 577.6857251983316
Epoch 700, Loss: 572.315001787408
Epoch 800, Loss: 567.0442450314109
Epoch 900, Loss: 561.8758468508678
Epoch 1000, Loss: 556.8027064282724
Epoch 1100, Loss: 551.8143244370971
Epoch 1200, Loss: 546.902730587956
Epoch 1300, Loss: 542.0669655505415
Epoch 1400, Loss: 537.2979243508485
Epoch 1500, Loss: 532.5813310922357
Epoch 1600, Loss: 527.9056193177004
Epoch 1700, Loss: 523.2644547252622
Epoch 1800, Loss: 518.6544130981636
Epoch 1900, Loss: 514.0734630699509
Epoch 2000, Loss: 509.52031571801615
Epoch 2100, Loss: 504.9941100640798
Epoch 2200, Loss: 500.4942470361692
Epoch 2300, Loss: 496.0202956828367
Epoch 2400, Loss: 491.5719372648188
Epoch 2500, Loss: 487.14893042068616
Epoch 2600, Loss: 482.7510886449305
Epoch 2700, Loss: 478.3782652606932
Epoch 2800, Loss: 474.0303431168711
Epoch

In [9]:
# Initialize and train TREENN2
input_dim = X_train.shape[1]
hidden_dim = 10
output_dim = 1
epochs = 1000
learning_rate = 0.001

start_time = time.time()
treenn2 = TREENN2(input_dim, hidden_dim, output_dim)
treenn2.train(X_train, y_train, epochs, learning_rate)
end_time = time.time()
treenn2_train_time = end_time - start_time

Epoch 0, Loss: 589.0452029724268
Epoch 100, Loss: 470.7939577039868


Epoch 200, Loss: 358.2679207530062
Epoch 300, Loss: 255.81353295582977
Epoch 400, Loss: 168.077202715935
Epoch 500, Loss: 97.32971622165032
Epoch 600, Loss: 45.561803476522265
Epoch 700, Loss: 15.133814602572725
Epoch 800, Loss: 7.389116148230879
Epoch 900, Loss: 7.253237351153362


In [10]:
# Measure computational time and evaluate the FONN1 model
start_time = time.time()
fonn1_predictions = fonn1.forward(X_test)
end_time = time.time()
fonn1_comp_time = end_time - start_time

fonn1_r2 = r2_score(y_test, fonn1_predictions)
fonn1_mae = mean_absolute_error(y_test, fonn1_predictions)
fonn1_mse = mean_squared_error(y_test, fonn1_predictions)

results["FONN1"] = {"R² Score": fonn1_r2, "MAE": fonn1_mae, "MSE": fonn1_mse,
                    "Train Time (s)": fonn1_train_time, "Comp Time (s)": fonn1_comp_time}

ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 0, the array at index 0 has size 102 and the array at index 1 has size 404

In [None]:
# Measure computational time and evaluate the custom MLP model
start_time = time.time()
fonn2_predictions = fonn2.forward(X_test)
end_time = time.time()
fonn2_comp_time = end_time - start_time

fonn2_r2 = r2_score(y_test, fonn2_predictions)
fonn2_mae = mean_absolute_error(y_test, fonn2_predictions)
fonn2_mse = mean_squared_error(y_test, fonn2_predictions)

results["FONN2"] = {"R² Score": fonn2_r2, "MAE": fonn2_mae, "MSE": fonn2_mse,
                    "Train Time (s)": fonn2_train_time, "Comp Time (s)": fonn2_comp_time}

# Measure computational time and predict house prices using the decision trees in the hidden layer
start_time = time.time()
fonn2_tree_predictions = fonn2.tree_predict(X_test)
end_time = time.time()
fonn2_tree_comp_time = end_time - start_time

fonn2_tree_r2 = r2_score(y_test, fonn2_tree_predictions)
fonn2_tree_mae = mean_absolute_error(y_test, fonn2_tree_predictions)
fonn2_tree_mse = mean_squared_error(y_test, fonn2_tree_predictions)

results["Tree-based Predictions (FONN2)"] = {"R² Score": fonn2_tree_r2, "MAE": fonn2_tree_mae,
                                             "MSE": fonn2_tree_mse, "Train Time (s)": fonn2_train_time, "Comp Time (s)": fonn2_tree_comp_time}

In [None]:
# Combine 10 decision trees and evaluate the ensemble model
start_time = time.time()
trees = [DecisionTreeRegressor(max_depth=5, random_state=i).fit(
    X_train, y_train) for i in range(10)]
end_time = time.time()
ensemble_train_time = end_time - start_time

start_time = time.time()
ensemble_predictions = np.mean(
    [tree.predict(X_test) for tree in trees], axis=0)
end_time = time.time()
ensemble_comp_time = end_time - start_time

ensemble_r2 = r2_score(y_test, ensemble_predictions)
ensemble_mae = mean_absolute_error(y_test, ensemble_predictions)
ensemble_mse = mean_squared_error(y_test, ensemble_predictions)

results["Ensemble of 10 Trees"] = {"R² Score": ensemble_r2, "MAE": ensemble_mae,
                                   "MSE": ensemble_mse, "Train Time (s)": ensemble_train_time, "Comp Time (s)": ensemble_comp_time}

In [None]:
# Measure computational time and evaluate the TREENN1 model
start_time = time.time()
treenn1_predictions = treenn1.forward(X_test)
end_time = time.time()
treenn1_comp_time = end_time - start_time

treenn1_r2 = r2_score(y_test, treenn1_predictions)
treenn1_mae = mean_absolute_error(y_test, treenn1_predictions)
treenn1_mse = mean_squared_error(y_test, treenn1_predictions)

results["TREENN1"] = {"R² Score": treenn1_r2, "MAE": treenn1_mae, "MSE": treenn1_mse,
                      "Train Time (s)": treenn1_train_time, "Comp Time (s)": treenn1_comp_time}

In [None]:
# Measure computational time and evaluate the custom MLP model
start_time = time.time()
treenn2_predictions = treenn2.forward(X_test)
end_time = time.time()
treenn2_comp_time = end_time - start_time

treenn2_r2 = r2_score(y_test, treenn2_predictions)
treenn2_mae = mean_absolute_error(y_test, treenn2_predictions)
treenn2_mse = mean_squared_error(y_test, treenn2_predictions)

results["TREENN2"] = {"R² Score": treenn2_r2, "MAE": treenn2_mae,
                      "MSE": treenn2_mse, "Train Time (s)": treenn2_train_time, "Comp Time (s)": treenn2_comp_time}

# Measure computational time and predict house prices using the decision tree in the hidden layer
start_time = time.time()
treenn2_tree_predictions = treenn2.tree_hidden.predict(treenn2.a1)
end_time = time.time()
treenn2_tree_comp_time = end_time - start_time

treenn2_tree_r2 = r2_score(y_test, treenn2_tree_predictions)
treenn2_tree_mae = mean_absolute_error(y_test, treenn2_tree_predictions)
treenn2_tree_mse = mean_squared_error(y_test, treenn2_tree_predictions)

results["Tree-based Predictions (TREENN2)"] = {"R² Score": treenn2_tree_r2, "MAE": treenn2_tree_mae,
                                               "MSE": treenn2_tree_mse, "Train Time (s)": treenn2_train_time, "Comp Time (s)": treenn2_tree_comp_time}

In [None]:
# Convert results to a DataFrame for better visualization
results_df = pd.DataFrame(results).T
print(results_df)

In [None]:
# Get and print tree importances
tree_importances = fonn2.get_tree_importances()