In [1]:
import os

os.chdir("..")

current_directory = os.getcwd()
print("Current Working Directory:", current_directory)


Current Working Directory: c:\Users\max-s\Desktop\Uni\Master\Masterthesis\Code\git\overparametrized_dnn


In [2]:
# Import necessary modules and functions
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from src.neural_nets import create_network_1, create_network_2, create_network_3, parameter_tuning_nn, median_and_iqr_nn
from src.knn_regression import parameter_tuning_knn, median_and_iqr_knn, generate_neighbors
from data.data_generator import get_data, preprocess
from data.regression_functions import m1, m2, m3, m4, m5, m6, m7, m8




In [3]:
# Define a list of regression functions
regression_functions = [m1, m2, m3, m4, m5, m6, m7, m8]

In [None]:
# Define a list of neural networks
neural_networks = [create_network_1, create_network_2, create_network_3]


# Define empty dataframe to store results
results_nn = pd.DataFrame(
    columns=["regression_function", "network", "units", "mse", "iqr"]
)

# Loop through each regression function
for i, regression_func in enumerate(regression_functions):
    print(f"Regression Function {i+1}: {regression_func.__name__}")
    input_dim = regression_func.expected_dim
    x, y = get_data(regression_func, x_dim=input_dim, num_samples=1000, sigma=0.05)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

    # Preprocess data
    train_data = preprocess(x_train, y_train, training=True)
    test_data = preprocess(x_test, y_test, training=False)

    # Tune networks
    units = [3, 6, 9, 12, 15]
    for j, network in enumerate(neural_networks):
        best_params = parameter_tuning_nn(
            network,
            units=units,
            train_data=train_data,
            test_data=test_data,
            input_dim=input_dim,
        )

        # Calculate regression for best units
        best_units = best_params["best_config"]
        mse, iqr = median_and_iqr_nn(
            network,
            units=best_units,
            input_dim=input_dim,
            regression_func=regression_func,
        )

        # Append the results to the dataframe
        df_nn = pd.DataFrame(
            {
                "regression_function": [regression_func.__name__],
                "network": [network.__name__],
                "units": [best_units],
                "mse": [mse],
                "iqr": [iqr],
            }
        )
        results_nn = pd.concat([results_nn, df_nn], ignore_index=True)

        print(f"Best Units: {best_units}")
        print(f"MSE: {mse}")
        print(f"IQR: {iqr}")

    print("--------------------")

In [None]:
# Define the file path
file_path = "results/nn_performance.csv"

# Save the results to a csv file
results_nn.to_csv(file_path, index=False)

In [4]:
# Define empty dataframe to store results
results_knn = pd.DataFrame(columns=["regression_function", "units", "mse", "iqr"])

# Loop through each regression function
for i, regression_func in enumerate(regression_functions):
    print(f"Regression Function {i+1}: {regression_func.__name__}")
    input_dim = regression_func.expected_dim
    x, y = get_data(regression_func, x_dim=input_dim, num_samples=1000, sigma=0.05)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

    # Preprocess data
    train_data = (x_train, y_train)
    test_data = (x_test, y_test)

    # Tune number of neighbors
    units = generate_neighbors(x_train.shape[0], 25)
    best_params = parameter_tuning_knn(
        units=units,
        train_data=train_data,
        test_data=test_data,
    )

    # Calculate regression for the best number of neighbors
    best_units = best_params["best_config"]
    mse, iqr = median_and_iqr_knn(
        unit=best_units,
        input_dim=input_dim,
        regression_func=regression_func,
    )

    # Append the results to the dataframe
    df_knn = pd.DataFrame(
        {
            "regression_function": [regression_func.__name__],
            "units": [best_units],
            "mse": [mse],
            "iqr": [iqr],
        }
    )
    results_knn = pd.concat([results_knn, df_knn], ignore_index=True)

    print(f"Best Units: {best_units}")
    print(f"MSE: {mse}")
    print(f"IQR: {iqr}")

    print("--------------------")

Regression Function 1: m1
Unit: 1, MSE: 0.0010679819099870347, MAE: 0.02645769671599161
Unit: 2, MSE: 0.000927720513983651, MAE: 0.0238648555845309
Unit: 3, MSE: 0.0008721294938256398, MAE: 0.023282364182289933
Unit: 4, MSE: 0.0008417912242182476, MAE: 0.022938075772106003
Unit: 5, MSE: 0.0008112991565969515, MAE: 0.022343931323831754
Unit: 7, MSE: 0.0007723867901882818, MAE: 0.021712096823407724
Unit: 9, MSE: 0.00072779232852138, MAE: 0.021164269545810077
Unit: 12, MSE: 0.0006736070396537679, MAE: 0.020195737023360198
Unit: 16, MSE: 0.0006626524416976074, MAE: 0.020010864348945293
Unit: 21, MSE: 0.0006912079646841313, MAE: 0.020035969347706385
Unit: 28, MSE: 0.0006959569065045059, MAE: 0.02029688200456386
Unit: 37, MSE: 0.0007268984102681325, MAE: 0.02022655017424048
Unit: 49, MSE: 0.0009584042191566785, MAE: 0.022147370799249097
Unit: 65, MSE: 0.0016655056071093527, MAE: 0.026015047874035212
Unit: 86, MSE: 0.003222539036177392, MAE: 0.03041865176252026
Unit: 113, MSE: 0.0068308758714

  results_knn = pd.concat([results_knn, df_knn], ignore_index=True)


Best Units: 16
MSE: 0.0007521695312888005
IQR: 2.7830150805149465e-05
--------------------
Regression Function 2: m2
Unit: 1, MSE: 0.0051734224156306085, MAE: 0.05633013899231302
Unit: 2, MSE: 0.0038640937067957993, MAE: 0.05025379338747637
Unit: 3, MSE: 0.0034896927625728525, MAE: 0.04732767870828592
Unit: 4, MSE: 0.003267879846697075, MAE: 0.046692689610664376
Unit: 5, MSE: 0.0030263546273992447, MAE: 0.04438234593882693
Unit: 7, MSE: 0.002839697060809195, MAE: 0.041609145243174994
Unit: 9, MSE: 0.002978478561499211, MAE: 0.042698221466692346
Unit: 12, MSE: 0.002857232520624815, MAE: 0.04219691158431635
Unit: 16, MSE: 0.002844280535396867, MAE: 0.04221524406315421
Unit: 21, MSE: 0.0029186003431124707, MAE: 0.04283323696347446
Unit: 28, MSE: 0.002960184453809309, MAE: 0.042835022413051774
Unit: 37, MSE: 0.003203387622674429, MAE: 0.0447540583866968
Unit: 49, MSE: 0.003808586089311753, MAE: 0.0486510295896174
Unit: 65, MSE: 0.005019971323901514, MAE: 0.0536772221226492
Unit: 86, MSE: 0

In [6]:
# Define the file path
file_path = "results/knn_performance.csv"

# Save the results to a csv file
results_knn.to_csv(file_path, index=False)