In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [22]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense



In [14]:
df = pd.read_csv("./datasets/data_csv.csv")

x = df.iloc[:, 0]
y = df.iloc[:, 1]

x = np.expand_dims(x, 1)
y = np.expand_dims(y, 1)

In [15]:
x_train, x_, y_train, y_ = train_test_split(x, y, test_size=0.4, random_state=1)

In [16]:
x_cv, x_test, y_cv, y_test = train_test_split(x_, y_, test_size=0.5, random_state=1)

In [17]:
del x_, y_

In [18]:
## Adding polynomial Features
degree = 1
poly = PolynomialFeatures(degree, include_bias=False)
x_train_mapped = poly.fit_transform(x_train)
x_cv_mapped = poly.transform(x_cv)
x_test_mapped = poly.transform(x_test)

In [20]:
## Scaling the features

scaler = StandardScaler()
x_train_mapped_scaled = scaler.fit_transform(x_train_mapped)
x_cv_mapped_scaled = scaler.transform(x_cv_mapped)
x_test_mapped_scaled = scaler.transform(x_test_mapped)

## Building the NN models

In [58]:
# units for each layer of each model
units = [[25,15,1], [20,12,12,20,1], [32,16,8,4,12,1]]

In [59]:
nn_models = []
for each in units:
    dense_layers = []
    for layers_units in range(len(each)):
        if each[layers_units] == 1:
            dense_layers.append(Dense(units=each[layers_units], activation="linear"))
        else:
            dense_layers.append(Dense(units=each[layers_units], activation="relu"))

    model = Sequential(dense_layers)
    print(model)
    nn_models.append(model)
    

<keras.engine.sequential.Sequential object at 0x15e064c70>
<keras.engine.sequential.Sequential object at 0x15de9d8d0>
<keras.engine.sequential.Sequential object at 0x15e067850>


In [60]:
nn_train_mses = []
nn_cv_mses = []

In [61]:
for models in nn_models:
    model.compile(
        loss="mse",
        optimizer = tf.keras.optimizers.Adam(learning_rate=0.1)
    )
    print(f"Training model...")
    model.fit(
        x_train_mapped_scaled, 
        y_train,
        epochs=300,
        verbose=0
    )
    print("Training finished \n")

    # find the training MSEs
    yhat = model.predict(x_train_mapped_scaled)
    train_mse = mean_squared_error(y_train, yhat) / 2
    nn_train_mses.append(train_mse)

    # find corss validation MSEs
    yhat_cv = model.predict(x_cv_mapped_scaled)
    cv_mse = mean_squared_error(y_cv, yhat_cv) / 2
    nn_cv_mses.append(cv_mse)

Training model...


2025-01-20 13:20:45.971911: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Training finished 

Training model...
Training finished 

Training model...
Training finished 



In [62]:
# print results
print("RESULTS:")
for model_num in range(len(nn_train_mses)):
    print(
        f"Model {model_num+1}: Training MSE: {nn_train_mses[model_num]:.2f}, " +
        f"CV MSE: {nn_cv_mses[model_num]:.2f}"
        )

RESULTS:
Model 1: Training MSE: 75.39, CV MSE: 98.89
Model 2: Training MSE: 73.25, CV MSE: 100.25
Model 3: Training MSE: 43.08, CV MSE: 73.80


In [64]:
# Select the model with the lowest CV MSE
model_num = 3

# Compute the test MSE
yhat = nn_models[model_num-1].predict(x_test_mapped_scaled)
test_mse = mean_squared_error(y_test, yhat) / 2

print(f"Selected Model: {model_num}")
print(f"Training MSE: {nn_train_mses[model_num-1]:.2f}")
print(f"Cross Validation MSE: {nn_cv_mses[model_num-1]:.2f}")
print(f"Test MSE: {test_mse:.2f}")

Selected Model: 3
Training MSE: 43.08
Cross Validation MSE: 73.80
Test MSE: 91.44
