# Test

# Prepare the data

In [1]:
from pipelines_selected_features import pipeline
import pandas as pd
from data import load_data_selected_features

train_data, additional_train_data, test_data = load_data_selected_features()

all_train_data = pd.concat([train_data, additional_train_data], axis=0)
all_train_data_transformed = pipeline.fit_transform(all_train_data)

train_data_transformed = all_train_data_transformed[:len(train_data)]
additional_train_data_transformed = all_train_data_transformed[len(train_data):]

X_train = train_data_transformed.drop(columns=['bg+1:00'])
y_train = train_data_transformed['bg+1:00']

X_additional_train = additional_train_data_transformed.drop(columns=['bg+1:00'])
y_additional_train = additional_train_data_transformed['bg+1:00']

In [2]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input, Activation
from model_performance_calculations import calculate_dnn_performance, get_rmse_boxplot_chart, get_rmse_line_chart


def rmse(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true)))


input_dim = X_train.shape[1]
model = Sequential([
    # Input Layer
    Input(shape=(input_dim,)),
    Dense(128),
    BatchNormalization(),
    Activation('relu'),
    Dropout(0.5),

    # Hidden Layer 1
    Dense(64),
    BatchNormalization(),
    Activation('relu'),
    Dropout(0.5),

    # Hidden Layer 2    
    Dense(32),
    BatchNormalization(),
    Activation('relu'),
    Dropout(0.5),

    # Output Layer
    Dense(1, activation='linear')
])

model.compile(optimizer='adam', loss='mse', metrics=[rmse])

calculate_dnn_performance(model, X_train, y_train, X_additional_train, y_additional_train, n_splits=3, epochs=5)


10:44:13 - Start training DNN


10:44:13 - Selected splitter: ShuffleSplit(n_splits=3, random_state=42, test_size=0.2, train_size=None)
10:44:13 - Split 1/3
Epoch 1/5
8934/8934 - 6s - 705us/step - loss: 9.7594 - rmse: 2.9814 - val_loss: 4.9285 - val_rmse: 2.1908
Epoch 2/5
8934/8934 - 6s - 617us/step - loss: 5.6293 - rmse: 2.3386 - val_loss: 4.5954 - val_rmse: 2.1150
Epoch 3/5
8934/8934 - 5s - 604us/step - loss: 5.1311 - rmse: 2.2333 - val_loss: 4.7753 - val_rmse: 2.1579
Epoch 4/5
8934/8934 - 6s - 624us/step - loss: 4.9730 - rmse: 2.1988 - val_loss: 4.6380 - val_rmse: 2.1259
Epoch 5/5
8934/8934 - 6s - 668us/step - loss: 4.9367 - rmse: 2.1919 - val_loss: 4.5570 - val_rmse: 2.1074
[1m1054/1054[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 323us/step
10:44:42 - R^2: 0.5758102004821677, RMSE: 2.134704716481648, MAE: 1.5924285853769622, MSE: 4.556964226568994
10:44:42 - Split 2/3
Epoch 1/5
8934/8934 - 5s - 601us/step - loss: 4.8754 - rmse: 2.1774 - val_loss: 4.5129 - val_rmse: 2.0974
Epoch 2/5
8934/8934 - 5s - 606

ModelScore(name='DNN', r_squared={'mean': np.float64(0.5878344044824707), 'std': np.float64(0.009170293030450995), 'min': np.float64(0.5758102004821677), 'max': np.float64(0.598054273113013), 'values': [0.5758102004821677, 0.5896387398522315, 0.598054273113013]}, rmse={'mean': np.float64(2.0993228013547673), 'std': np.float64(0.030500256228210915), 'min': np.float64(2.0602662521438804), 'max': np.float64(2.134704716481648), 'values': [np.float64(2.134704716481648), np.float64(2.1029974354387733), np.float64(2.0602662521438804)]}, mae={'mean': np.float64(1.5727787065488068), 'std': np.float64(0.01819569272918485), 'min': np.float64(1.5485650025118898), 'max': np.float64(1.5924285853769622), 'values': [np.float64(1.5924285853769622), np.float64(1.5773425317575687), np.float64(1.5485650025118898)]}, mse={'mean': np.float64(4.408086489918014), 'std': np.float64(0.12789486084717955), 'min': np.float64(4.244697029722991), 'max': np.float64(4.556964226568994), 'values': [np.float64(4.55696422

In [3]:
from sklearn.model_selection import cross_val_score
from scikeras.wrappers import KerasRegressor
from tensorflow.keras.callbacks import EarlyStopping


def rmse(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true)))


def create_model(input_dimension: int):
    dnn = Sequential([
        # Input Layer
        Input(shape=(input_dimension,)),
        Dense(128),
        BatchNormalization(),
        Activation('relu'),
        Dropout(0.5),

        # Hidden Layer 1
        Dense(64),
        BatchNormalization(),
        Activation('relu'),
        Dropout(0.5),

        # Hidden Layer 2    
        Dense(32),
        BatchNormalization(),
        Activation('relu'),
        Dropout(0.5),

        # Output Layer
        Dense(1, activation='linear')
    ])

    dnn.compile(optimizer='adam', loss='mse', metrics=[rmse])
    return dnn


input_dim = X_additional_train.shape[1]
pretrained_model = create_model(input_dim)
pretrained_model.fit(
    X_additional_train,
    y_additional_train,
    validation_split=0.2,
    callbacks=[EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)],
    epochs=10,
    verbose=2
)
pretrained_model.save_weights("pretrained_model.weights.h5")


# Build a function to create a model with pretrained weights and freeze layers
def build_fn_with_pretrained_weights():
    dnn = create_model(input_dim)
    dnn.load_weights("pretrained_model.weights.h5")  # Load the pre-trained weights

    # Freeze all layers except the last one (optional)
    for layer in dnn.layers:
        layer.trainable = False
    # Unfreeze the output layer if you want to fine-tune it
    # dnn.layers[-1].trainable = True

    dnn.compile(optimizer='adam', loss='mse', metrics=[rmse])
    return dnn


# Wrap the model in KerasRegressor
keras_regressor = KerasRegressor(model=build_fn_with_pretrained_weights, verbose=2)

scores = cross_val_score(keras_regressor, X_train, y_train, cv=3, scoring='neg_root_mean_squared_error')
print(scores)



Epoch 1/10
4215/4215 - 4s - 840us/step - loss: 13.9726 - rmse: 3.5495 - val_loss: 7.5430 - val_rmse: 2.3702
Epoch 2/10
4215/4215 - 3s - 716us/step - loss: 7.4622 - rmse: 2.6951 - val_loss: 5.7828 - val_rmse: 2.1236
Epoch 3/10
4215/4215 - 3s - 642us/step - loss: 6.2959 - rmse: 2.4765 - val_loss: 5.3308 - val_rmse: 2.0487
Epoch 4/10
4215/4215 - 3s - 641us/step - loss: 5.8544 - rmse: 2.3862 - val_loss: 5.5434 - val_rmse: 2.0755
Epoch 5/10
4215/4215 - 3s - 678us/step - loss: 5.5404 - rmse: 2.3219 - val_loss: 5.3240 - val_rmse: 2.0469
Epoch 6/10
4215/4215 - 3s - 711us/step - loss: 5.3693 - rmse: 2.2848 - val_loss: 5.5005 - val_rmse: 2.0732
Epoch 7/10
4215/4215 - 3s - 711us/step - loss: 5.2938 - rmse: 2.2692 - val_loss: 5.2828 - val_rmse: 2.0487
Epoch 8/10
4215/4215 - 3s - 718us/step - loss: 5.2009 - rmse: 2.2494 - val_loss: 5.1437 - val_rmse: 2.0215
Epoch 9/10
4215/4215 - 3s - 645us/step - loss: 5.1852 - rmse: 2.2454 - val_loss: 5.4182 - val_rmse: 2.0581
Epoch 10/10
4215/4215 - 3s - 648us/s

  saveable.load_own_variables(weights_store.get(inner_path))


TypeError: Could not locate function 'rmse'. Make sure custom classes are decorated with `@keras.saving.register_keras_serializable()`. Full object config: {'module': 'builtins', 'class_name': 'function', 'config': 'rmse', 'registered_name': 'function'}

In [4]:
# Wrap the model in KerasRegressor
keras_regressor = KerasRegressor(model=build_fn_with_pretrained_weights, verbose=2)

scores = cross_val_score(keras_regressor, X_train, y_train, cv=3, scoring='neg_root_mean_squared_error')
print(scores)

  saveable.load_own_variables(weights_store.get(inner_path))


3146/3146 - 2s - 586us/step - loss: 3.9155 - rmse: 3.4538
1573/1573 - 1s - 339us/step


  saveable.load_own_variables(weights_store.get(inner_path))


3146/3146 - 1s - 441us/step - loss: 4.7258 - rmse: 3.5746
1573/1573 - 1s - 330us/step


  saveable.load_own_variables(weights_store.get(inner_path))


3146/3146 - 1s - 439us/step - loss: 4.4040 - rmse: 3.5047
1573/1573 - 1s - 325us/step
[-2.17800028 -1.84206324 -1.93579263]


## Predict for validation data and check results

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Concatenate
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input, Activation
from tensorflow.keras.optimizers import Adam
from scikeras.wrappers import KerasRegressor


def rmse(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true)))


def get_dnn_model(input_dimension: int):
    input_layer = Input(shape=(input_dimension,))
    wide = Dense(128, activation='relu')(input_layer)  # Wide component
    deep = Dense(128, activation='relu')(input_layer)
    deep = Dense(64, activation='relu')(deep)
    deep = Dense(32, activation='relu')(deep)

    merged = Concatenate()([wide, deep])
    output_layer = Dense(1, activation='linear')(merged)

    dnn_model = Model(inputs=input_layer, outputs=output_layer)
    dnn_model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='mse',
        metrics=[rmse]
    )

    return dnn_model


keras_regressor = KerasRegressor(
    model=get_dnn_model(X_train.shape[1]),
    epochs=100,
    verbose=2,
    loss='mse',
    metrics=[rmse]
)

keras_regressor.fit(X_additional_train, y_additional_train)


In [None]:
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor
from sklearn.ensemble import StackingRegressor, HistGradientBoostingRegressor
from sklearn.linear_model import LassoLarsIC, Ridge

hgb_estimator = HistGradientBoostingRegressor(max_iter=200, max_depth=5, learning_rate=0.1)
lasso_lars_ic_base_model = LassoLarsIC(criterion='bic', max_iter=10000)
knn_base_model = KNeighborsRegressor(n_neighbors=5)
xgb_base_model = XGBRegressor(objective='reg:squarederror', random_state=42, n_estimators=500, max_depth=5, learning_rate=0.1)

keras_regressor = KerasRegressor(
    model=get_dnn_model(X_train.shape[1]),
    epochs=100,
    verbose=2
)

estimators = [
    ('dnn', keras_regressor),
    ('hgb', hgb_estimator),
    ('lasso_lars_ic', lasso_lars_ic_base_model),
    ('knn', knn_base_model),
    ('xgb', xgb_base_model)

]

model = StackingRegressor(estimators=estimators, final_estimator=Ridge(alpha=0.1), n_jobs=-1, verbose=2)

In [None]:
from datetime import datetime
import os
from model_performance_calculations import calculate_stacking_regressor_performance, get_rmse_boxplot_chart, get_rmse_line_chart, save_performances, save_model, \
    calculate_dnn_performance

date_time = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
model_name = f'{date_time}-model-v4.0-DNN'

save_model(model, os.path.join('models', f'{model_name}.pkl'))

performances = calculate_stacking_regressor_performance(model, X_train, y_train, X_additional_train, y_additional_train, n_splits=1)
save_performances(performances, os.path.join('models', f'{model_name}-performances.json'))

get_rmse_boxplot_chart(performances).show()
get_rmse_line_chart(performances).show()

In [None]:
import numpy as np
from pipelines_selected_features import pipeline

all_train_data_transformed = pipeline.fit_transform(pd.concat([train_data, additional_train_data], axis=0))

X_train = all_train_data_transformed.drop(columns=['bg+1:00'])
y_train = all_train_data_transformed['bg+1:00']
X_test = pipeline.transform(test_data)

model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)
test_data['bg+1:00'] = y_pred

if np.sum(y_pred < 0) > 0:
    print(f'Number of negative values: {np.sum(y_pred < 0)}')
    bg_min_train = np.min(y_train)
    print(f'Min value: {np.min(y_pred)}')
    print(f'Filling negative values with {bg_min_train}')
    y_pred = np.where(y_pred < 0, bg_min_train, y_pred)

test_data['bg+1:00'] = y_pred
test_data.head()

In [None]:
submission = pd.DataFrame(test_data['bg+1:00'])
submission

In [None]:
submission.to_csv(f'submission-{os.path.basename(os.getcwd())}.csv')