In [1]:
import os

os.chdir("..")

current_directory = os.getcwd()
print("Current Working Directory:", current_directory)


Current Working Directory: c:\Users\max-s\Desktop\Uni\Master\Masterthesis\Code\git\overparametrized_dnn


In [2]:
# Import necessary modules and functions
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from src.neural_nets import create_network_1, create_network_2, create_network_3, parameter_tuning_nn, median_and_iqr_nn
from src.knn_regression import parameter_tuning_knn, median_and_iqr_knn, generate_neighbors
from data.data_generator import get_data, preprocess
from data.regression_functions import m1, m2, m3, m4, m5, m6, m7, m8




In [4]:
# Define a list of regression functions
regression_functions = [m1, m2, m3, m4, m5, m6, m7, m8]

In [None]:
# Define a list of neural networks
neural_networks = [create_network_1, create_network_2, create_network_3]


# Define empty dataframe to store results
results_nn = pd.DataFrame(
    columns=["regression_function", "network", "units", "mse", "iqr"]
)

# Loop through each regression function
for i, regression_func in enumerate(regression_functions):
    print(f"Regression Function {i+1}: {regression_func.__name__}")
    input_dim = regression_func.expected_dim
    x, y = get_data(regression_func, x_dim=input_dim, num_samples=1000, sigma=0.05)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

    # Preprocess data
    train_data = preprocess(x_train, y_train, training=True)
    test_data = preprocess(x_test, y_test, training=False)

    # Tune networks
    units = [3, 6, 9, 12, 15]
    for j, network in enumerate(neural_networks):
        best_params = parameter_tuning_nn(
            network,
            units=units,
            train_data=train_data,
            test_data=test_data,
            input_dim=input_dim,
        )

        # Calculate regression for best units
        best_units = best_params["best_config"]
        mse, iqr = median_and_iqr_nn(
            network,
            units=best_units,
            input_dim=input_dim,
            regression_func=regression_func,
        )

        # Append the results to the dataframe
        df_nn = pd.DataFrame(
            {
                "regression_function": [regression_func.__name__],
                "network": [network.__name__],
                "units": [best_units],
                "mse": [mse],
                "iqr": [iqr],
            }
        )
        results_nn = pd.concat([results_nn, df_nn], ignore_index=True)

        print(f"Best Units: {best_units}")
        print(f"MSE: {mse}")
        print(f"IQR: {iqr}")

    print("--------------------")

In [None]:
# Define the file path
file_path = "../results/nn_performance.csv"

# Save the results to a csv file
results_nn.to_csv(file_path, index=False)

In [6]:
# Define empty dataframe to store results
results_knn = pd.DataFrame(columns=["regression_function", "units", "mse", "iqr"])

# Loop through each regression function
for i, regression_func in enumerate(regression_functions):
    print(f"Regression Function {i+1}: {regression_func.__name__}")
    input_dim = regression_func.expected_dim
    x, y = get_data(regression_func, x_dim=input_dim, num_samples=1000, sigma=0.05)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

    # Preprocess data
    train_data = (x_train, y_train)
    test_data = (x_test, y_test)

    # Tune number of neighbors
    units = generate_neighbors(x_train.shape[0], 25)
    best_params = parameter_tuning_knn(
        units=units,
        train_data=train_data,
        test_data=test_data,
    )

    # Calculate regression for the best number of neighbors
    best_units = best_params["best_config"]
    mse, iqr = median_and_iqr_knn(
        unit=best_units,
        input_dim=input_dim,
        regression_func=regression_func,
    )

    # Append the results to the dataframe
    df_knn = pd.DataFrame(
        {
            "regression_function": [regression_func.__name__],
            "units": [best_units],
            "mse": [mse],
            "iqr": [iqr],
        }
    )
    results_knn = pd.concat([results_knn, df_knn], ignore_index=True)

    print(f"Best Units: {best_units}")
    print(f"MSE: {mse}")
    print(f"IQR: {iqr}")

    print("--------------------")

Regression Function 1: m1
Unit: 1, MSE: 0.0013099430531880707, MAE: 0.029243209944611392
Unit: 2, MSE: 0.0008922530577736421, MAE: 0.024092395081021917
Unit: 3, MSE: 0.0007936205240010985, MAE: 0.022150677794079466
Unit: 4, MSE: 0.0007693110966847222, MAE: 0.021764895883729853
Unit: 5, MSE: 0.0007471009663378916, MAE: 0.021521416015457432
Unit: 7, MSE: 0.0006898488340138936, MAE: 0.020590203444596354
Unit: 9, MSE: 0.0006841622399785407, MAE: 0.020519996948412528
Unit: 12, MSE: 0.0006775195587432479, MAE: 0.020691974270794482
Unit: 16, MSE: 0.0006599828036340477, MAE: 0.020699734095759377
Unit: 21, MSE: 0.000663885338733316, MAE: 0.020736747823747306
Unit: 28, MSE: 0.0006992496577336049, MAE: 0.02073479091139621
Unit: 37, MSE: 0.0007375396220962691, MAE: 0.02034544579189044
Unit: 49, MSE: 0.0009271499065767563, MAE: 0.021577207141766152
Unit: 65, MSE: 0.0013815130432105515, MAE: 0.023806633079421834
Unit: 86, MSE: 0.0022225854326537726, MAE: 0.02686124005476874
Unit: 113, MSE: 0.0044167

  results_knn = pd.concat([results_knn, df_knn], ignore_index=True)


Best Units: 16
MSE: 0.0007809982584651209
IQR: 0.0
--------------------
Regression Function 2: m2
Unit: 1, MSE: 0.008014869456337425, MAE: 0.07102161978438176
Unit: 2, MSE: 0.005811879335894998, MAE: 0.06029972533198316
Unit: 3, MSE: 0.005176122416896151, MAE: 0.057163858800265616
Unit: 4, MSE: 0.004595231109716615, MAE: 0.053462527058220806
Unit: 5, MSE: 0.004401930179443586, MAE: 0.05287781779518962
Unit: 7, MSE: 0.004155235494838793, MAE: 0.0520466033215602
Unit: 9, MSE: 0.004156912475665806, MAE: 0.05182067102209724
Unit: 12, MSE: 0.004023607234905343, MAE: 0.05068686030010392
Unit: 16, MSE: 0.003940893962447072, MAE: 0.04980591086586407
Unit: 21, MSE: 0.00393545118693966, MAE: 0.05012526862910599
Unit: 28, MSE: 0.004066810573810505, MAE: 0.05086505096666613
Unit: 37, MSE: 0.004350865235528355, MAE: 0.051143596274538385
Unit: 49, MSE: 0.004966089240371046, MAE: 0.05334161861955028
Unit: 65, MSE: 0.00640837751945601, MAE: 0.0566267792837686
Unit: 86, MSE: 0.00954505685430922, MAE: 0

KeyboardInterrupt: 

In [5]:
for i, regression_func in enumerate(regression_functions):
    print(f"Regression Function {i+1}: {regression_func.__name__}")
    input_dim = regression_func.expected_dim
    x, y = get_data(regression_func, x_dim=input_dim, num_samples=1000, sigma=0.05)
    print(y.shape)

Regression Function 1: m1
(1000,)
Regression Function 2: m2
(1000,)
Regression Function 3: m3
(1000,)
Regression Function 4: m4
(1000,)
Regression Function 5: m5
(1000,)
Regression Function 6: m6
(1000,)
Regression Function 7: m7
(1000,)
Regression Function 8: m8
(1000,)


In [None]:
# Define the file path
file_path = "../results/knn_performance.csv"

# Save the results to a csv file
results_knn.to_csv(file_path, index=False)