In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
from datetime import datetime
from sklearn.preprocessing import PowerTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import TimeSeriesSplit
import random
import sys

In [None]:
# Import data
flu = pd.read_csv('../data/INFLUENZA_sentinella/data.csv')
weather = pd.read_csv('../data/weather/reg_weather.csv')
google_flu = pd.read_csv('../data/google_search_trend/reg_google_grippe.csv')
google_symptoms = pd.read_csv('../data/google_search_trend/reg_google_fieber_husten.csv')
pop = pd.read_csv('../data/pop_data_cantons/weekly_imputed_pop_data_final.csv')

<h2>Disclaimer</h2>

#### This notebook was only used to tune the autoregressive neural network on the google data for the search term flu. The configurations were subsequently used to forecast the google data within the combined model in the notebook "ffn_model_exogenous".

<h3>Data Consolidation</h3>

<h4>Extract relevant data from BAG dataset on weekly flu incidence</h4>

In [None]:
# Create dataframe for regional observations, no differentiation between sex or age
flu_reg = flu.query('georegion_type == "sentinella_region" and agegroup == "all" and sex == "all"').copy()

# Drop rows for georegion "unknown", which only contain NaNs using mask
flu_reg = flu_reg[~(flu_reg['georegion'] == 'unknown')]

# Select columns required for analysis
selected_cols = ['temporal', 'georegion', 'incValue', 'value']
flu_reg = flu_reg[selected_cols]

<h4>Convert dates and format of Google-Trend data for subsequent merging</h4>

In [None]:
# Align time-indexes of google data and flu data using created date to iso-calendar week dict
with open('date_dict.json', 'r') as f:
    date_dict = json.load(f)

# Create new column 'Woche' containing iso-calendar weeks for google-trend dates 
google_flu['Woche'] = google_flu['Woche'].apply(lambda x: date_dict[x]) 
google_symptoms['Woche'] = google_symptoms['Woche'].apply(lambda x: date_dict[x])

In [None]:
# Reshape google_flu from wide to long to enable merging on date and region 
google_flu = google_flu.melt(id_vars=['Woche'], var_name='region_query', value_name='search_activity')

# Separate region and query information from header into separate rows
google_flu['region'] = google_flu['region_query'].apply(lambda x: "_".join(x.split('_')[:2]))
google_flu['query'] = google_flu['region_query'].apply(lambda x: "_".join(x.split('_')[2:]))
google_flu.drop(columns='region_query', inplace=True) # Drop superfluous region_query column

# Reshape dataframe to get separate columns for each variable
google_flu = google_flu.pivot(index=['Woche', 'region'], columns='query', values='search_activity').reset_index()

In [None]:
## Repeat above process for second google dataset containing data on symptom queries
# Reshape google_flu from wide to long
google_symptoms = google_symptoms.melt(id_vars=['Woche'], var_name='region_query', value_name='search_activity')

# Separate region and query information from header into separate rows
google_symptoms['region'] = google_symptoms['region_query'].apply(lambda x: "_".join(x.split('_')[:2]))
google_symptoms['query'] = google_symptoms['region_query'].apply(lambda x: "_".join(x.split('_')[2:]))
google_symptoms.drop(columns='region_query', inplace=True) # Drop superfluous region_query column

# Reshape dataframe to get separate columns for each variable
google_symptoms = google_symptoms.pivot(index=['Woche', 'region'], columns='query', values='search_activity').reset_index()

<h4>Convert date format of weather data for merging</h4>

In [None]:
# Convert dates to 'YYYY-Www' ISO week format
dates = weather.date.values
iso_week_dates = [datetime.strptime(date, '%Y-%m-%d').isocalendar()[:2] for date in dates]
iso_week_dates = [f'{year}-W{week:02d}' for year, week in iso_week_dates]
weather['date'] = iso_week_dates

<h4>Merge datasets on date and region</h4>

In [None]:
merged_data = pd.merge(flu_reg, weather, how='left', left_on=['temporal', 'georegion'], right_on=['date', 'region']).sort_values(by=['georegion', 'temporal'])
merged_google = pd.merge(google_flu, google_symptoms, how='inner', on=['region', 'Woche'])
merged_data = pd.merge(merged_data, merged_google, how='left', left_on=['georegion', 'temporal'], right_on=['region', 'Woche'])

In [None]:
merged_data.drop(columns=['region_x', 'region_y', 'date', 'Woche'], inplace=True)

<h4>Convert ISO calendar-weeks to Gregorian calendar (format 'YYYY-MM-DD')</h4>

In [None]:
import datetime

# Convert ISO calendar-weeks to gregorian dates
# Functions based on answer by Ben James: <https://stackoverflow.com/questions/304256/whats-the-best-way-to-find-the-inverse-of-datetime-isocalendar>
def iso_year_start(iso_year):
    "The gregorian calendar date of the first day of the given ISO year"
    fourth_jan = datetime.date(iso_year, 1, 4)
    delta = datetime.timedelta(fourth_jan.isoweekday()-1)
    return fourth_jan - delta 

def iso_to_gregorian(iso_year, iso_week, iso_day):
    "Gregorian calendar date for the given ISO year, week and day"
    year_start = iso_year_start(iso_year)
    return year_start + datetime.timedelta(days=iso_day-1, weeks=iso_week-1)


In [None]:
# Extract week number and year from date column in ISO calendar week 
week_pattern = r'W(\d{1,2})' # RegEx pattern to extract week nr without trailing zero
merged_data['week_number'] = merged_data['temporal'].str.extract(week_pattern).astype(int)
merged_data['year'] = merged_data['temporal'].apply(lambda x: x.split('-')[0])
merged_data['year'] = pd.to_numeric(merged_data['year']) # Convert from string to numeric

# Convert from iso-calendar week to gregorian dates (format: YYYY-MM-DD)
merged_data['date'] = list(map(lambda year, week: iso_to_gregorian(year, week, 4), merged_data['year'], merged_data['week_number']))

In [None]:
# Inspect resulting dataframe
merged_data.head()

<h2>Data exploration</h2>

In [None]:
# Display descriptive statistics
merged_data['incValue'].describe()

In [None]:
# Display missing values in reported flu incidence across regions
merged_data[merged_data['incValue'].isna()]

In [None]:
# Impute missing values in March 2020 linearly
merged_data['incValue'].interpolate(inplace=True)

<h4>Inspect incidence of consultations for influenza-like-diseases over time</h4>

In [None]:
# Dictionary for cantons within each region
region_to_ct = {'region_1': ['Genf', 'Neuenburg', 'Waadt', 'Wallis'], 
           'region_2': ['Bern', 'Freiburg', 'Jura'], 
           'region_3': ['Aargau', 'Basel-Landschaft', 'Basel-Stadt', 'Solothurn'], 
           'region_4': ['Luzern', 'Nidwalden', 'Obwalden', 'Schwyz', 'Uri', 'Zug'], 
           'region_5': ['Appenzell_Innerrhoden', 'Appenzell_Ausserrhoden', 'Glarus', 'Sankt_Gallen', 'Schaffhausen', 'Thurgau', 'Zürich'], 
           'region_6': ['Graubünden', 'Tessin']}

# Plot the regional incidence value for the provided timeframe

#  Set plot style
plt.style.use('ggplot')

# Create subplots for each region
fig, ax = plt.subplots(6, figsize=(10, 15))  # Adjusted figure size for better spacing
fig.suptitle('Weekly incidence of consultations for influenza-like-diseases per region from 2013-2023', fontsize=12)

# Adjust the spacing of the subplots
fig.tight_layout(rect=[0, 0.03, 1, 0.95])
fig.subplots_adjust(hspace=0.5)  # Adjust horizontal space between plots

for i in range(1, 7):
    ax[i-1].axvline(pd.Timestamp('2020-01-01'), linestyle='--', color='grey', lw=1, alpha=.7)
    ax[i-1].plot(merged_data.set_index('date').loc[merged_data.set_index('date')['georegion'] == f"region_{i}", 'incValue'])
    ax[i-1].set_title(f"Region {i}: \n{region_to_ct[f'region_{i}']}", fontsize=10)

    if i == 5:
    # Change the color and line width of the spines for region 5
        for spine in ax[i-1].spines.values():
            spine.set_edgecolor('black')
            spine.set_linewidth(2)
            spine.set_visible(True)

# Show the plot
plt.show()

<h3>Modelling</h3>

In [None]:
def create_lagged_features(df, column, number_of_lags, seasonal_lags=None):
    # Copy the original DataFrame to avoid modifying it
    df_lagged = df.copy()

    # Generate regular lagged features
    for lag in range(1, number_of_lags + 1):
        df_lagged[f'lag_{lag}'] = df_lagged[column].shift(lag)

    # Generate seasonal lags
    if seasonal_lags is not None:
        for season_lag in seasonal_lags:
            df_lagged[f'seasonal_lag_{season_lag}_helper'] = df_lagged[column].shift(season_lag-1)
            df_lagged[f'seasonal_lag_{season_lag}'] = df_lagged[column].shift(season_lag)

    return df_lagged


In [None]:
def autoregressive_iterative_forecast(model, initial_input, seasonal_input, n_steps):
    """
    Perform iterative forecasting using an autoregressive model.

    Args:
        model: Trained autoregressive model (e.g., MLPRegressor).
        initial_input: The initial input features (e.g., the last observation from the training set).
        n_steps: Number of future time steps to forecast.

    Returns:
        A list of forecasts, one for each future time step.
    """
    i = 0
    current_input = initial_input.copy()
    current_input = np.array(current_input)
    seasonal_input = np.array(seasonal_input)
    # print(f'Seasonal lags: {seasonal_input}')
    forecasts = []

    for _ in range(n_steps):
        # Predict the next step
        # print(f'Current_input start: {current_input}\n')
        # print(len(current_input))
        next_step_pred = model.predict(current_input.reshape(1, -1))[0]
        # print(f'\nWeek {i+1}\n')  
        # print(f'Pred: {next_step_pred}')  
        forecasts.append(next_step_pred)
        # print(f'Forecasts: {forecasts}')
        
        # Update the current input to include the new prediction
        # Roll all lags except the last one (seasonal lag)
        current_input[:-1] = np.roll(current_input[:-1], 1)
        # print(f'Current_input after roll: {current_input}\n')
        current_input[0] = next_step_pred
        
        # Update the seasonal lag (52-period lag)
        if i < 52:
            # Use the actual seasonal lag value for the first 52 weeks
            current_input[-1] = seasonal_input[i]
        else:
            # Use forecasted value for the seasonal lag after 52 weeks
            current_input[-1] = forecasts[i - 52]
        # This needs to be handled based on your specific logic
        # For example, fetching the value from 52 periods ago or some other logic
        
        # print(f'Current_input with next_step_pred: {current_input}')
        # print(f'\n {60*"#"}\n')

        i += 1

    return np.array(forecasts)

# training_cols = [col for col in df_lagged.columns if ('lag_' in col) and ('_helper' not in col)]
# X_train_cv_scaled = df_lagged[training_cols]
# print(X_train_cv_scaled.iloc[0], f'Length:{len(X_train_cv_scaled.iloc[0])}')
# print(df_lagged['seasonal_lag_52_helper'].iloc[:52])
# prediction = autoregressive_iterative_forecast(model, X_train_cv_scaled.iloc[0], df_lagged[['seasonal_lag_52_helper']], 53)

In [None]:
# Extract data
data = merged_data.loc[(merged_data['georegion'] == "region_5") & (merged_data['date'].apply(lambda x: x.year < 2020))]
# Split the data
target = 'Grippe'
y = data[target]
split = int(len(y) * 0.8)
y_train, y_test = y[:split], y[split:]

i = 0
scores_df = pd.DataFrame(columns=['RMSE', 'lags', 'seasonal_lags', 'hidden_layers', 'alpha', 'batch_size', 'activation', 'learning_rate'])

In [None]:
import warnings
from sklearn.exceptions import ConvergenceWarning
from sklearn.preprocessing import PowerTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import TimeSeriesSplit
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import random
import sys

# Suppress convergence warnings
# warnings.filterwarnings('ignore', category=ConvergenceWarning)

# Define parameter configurations to assess
lags = 52 # Autoregressive lags to consider
hidden_layer_sizes = [(16, 16), (16, 16, 16)]
alphas = np.linspace(0.1, 0.3, num=100) # Regularization parameter
batch_size = 32
learning_rates = np.logspace(-3, -4, 100)
activations = ['relu']
seasonal = [52]

# Extract data
data = merged_data.loc[(merged_data['georegion'] == "region_5") & (merged_data['date'].apply(lambda x: x.year < 2020))]
# Split the data
y = data[target]
split = int(len(y) * 0.8)
y_train, y_test = y[:split], y[split:]

i = 0
scores_df = pd.DataFrame(columns=['RMSE', 'lags', 'seasonal_lags', 'hidden_layers', 'alpha', 'batch_size', 'activation', 'learning_rate'])

# Randomized search of hyperparameter configurations
random.seed(42)
iterations = 1000

for _ in range(iterations):
    # Randomly select hyperparameters
    lag = random.randint(1, lags)
    activation = random.choice(activations)
    learning_rate = random.choice(learning_rates)
    alpha = random.choice(alphas)
    hidden_layer_size = random.choice(hidden_layer_sizes)

    # Keep track of configurations and cv scores
    model = MLPRegressor(max_iter=2000, 
                        random_state=42, 
                        solver='adam', 
                        activation=activation, 
                        hidden_layer_sizes=hidden_layer_size, 
                        alpha=alpha, 
                        batch_size=batch_size, 
                        learning_rate_init=learning_rate,
                        warm_start=False, 
                        early_stopping=True)
    scores = []
    
    # Create lagged features based on the whole y_train
    df_lagged = create_lagged_features(pd.DataFrame(y_train, columns=[target]), column=target, number_of_lags=lag, seasonal_lags=seasonal)
    df_lagged.dropna(inplace=True)
    
    training_cols = [col for col in df_lagged.columns if ('lag_' in col) and ('_helper' not in col)]
    X = df_lagged[training_cols]
    X_seasonal = df_lagged['seasonal_lag_52_helper']
    y = df_lagged[target]
    # print(X)
    # print(y)
    
    val_index = range(len(y) - 52, len(y))
    train_index = range(0, len(y) - 52)
        
    y_train_cv, y_val = y.iloc[train_index], y.iloc[val_index]
    X_train_cv, X_val = X.iloc[train_index], X.iloc[val_index]
    X_seasonal_train, X_seasonal_val = X_seasonal.iloc[train_index], X_seasonal.iloc[val_index]

    # Take the first row of X_train_cv (the oldest lags)
    oldest_lags = X_train_cv.iloc[0, 1:].values.reshape(1, -1)

    # Concatenate y_train_cv with the oldest lags
    combined_data = np.vstack((y_train_cv.values.reshape(-1, 1), oldest_lags.T))

    # Fit the PowerTransformer and StandardScaler on the available lags in the training data (incl. lags in first row of lag df_train)
    pt = PowerTransformer(method='yeo-johnson', standardize=False)
    stdscaler = StandardScaler()
    combined_data_transformed = pt.fit_transform(combined_data)
    stdscaler.fit(combined_data_transformed)
    
    # Apply Transform to the entire y_train_cv
    y_train_cv_transformed = pt.transform(y_train_cv.values.reshape(-1, 1)).flatten()
    y_val_transformed = pt.transform(y_val.values.reshape(-1, 1)).flatten()

    # Apply the PowerTransformer to each lagged feature in X_train_cv and X_val
    X_train_cv_transformed = X_train_cv.apply(lambda column: pt.transform(column.values.reshape(-1, 1)).flatten())
    X_val_transformed = X_val.apply(lambda column: pt.transform(column.values.reshape(-1, 1)).flatten())
    X_seasonal_train_trans = pt.transform(X_seasonal_train.values.reshape(-1, 1)).flatten()
    X_seasonal_val_trans = pt.transform(X_seasonal_val.values.reshape(-1, 1)).flatten()

    
    # Apply StandardScaler()
    y_train_cv_scaled = stdscaler.transform(y_train_cv_transformed.reshape(-1, 1)).flatten()
    y_val_scaled = stdscaler.transform(y_val_transformed.reshape(-1, 1)).flatten()
    X_train_cv_scaled = X_train_cv_transformed.apply(lambda column: stdscaler.transform(column.values.reshape(-1, 1)).flatten())
    X_val_scaled = X_val_transformed.apply(lambda column: stdscaler.transform(column.values.reshape(-1, 1)).flatten())
    X_seasonal_train_scaled = stdscaler.transform(X_seasonal_train_trans.reshape(-1, 1)).flatten()
    X_seasonal_val_scaled = stdscaler.transform(X_seasonal_val_trans.reshape(-1, 1)).flatten()

    ######################
    # NOTE: PLOT VALIDATION AND TRAINING LOSSES - Adjust max_iter to 1 and set warm_start = True to enable

    # training_losses = []
    # validation_losses = []

    # for epoch in range(1000):  # Adjust the number of epochs as needed
    #     model.fit(X_train_cv_scaled.values, y_train_cv_scaled)

    #     # Store training loss from the last iteration
    #     training_losses.append(model.loss_curve_[-1])

    #     # Compute and store validation loss
    #     val_predictions = model.predict(X_val_scaled.values)
    #     val_loss = mean_squared_error(y_val_scaled, val_predictions)
    #     validation_losses.append(val_loss)
    
    # if fold == 2:
    #     plt.plot(training_losses, label='Training Loss')
    #     # If you have validation loss, plot it here
    #     plt.plot(validation_losses, label='Validation Loss')

    #     plt.title('Learning Curve')
    #     plt.xlabel('Epochs')
    #     plt.ylabel('Loss')
    #     plt.title(f'Lags: {lag}, Learning-rate: {learning_rate}, alpha: {alpha}, hidden layers: {hidden_layer_size}')
    #     plt.legend()
    #     plt.show()

    #######################

    # Fit model
    model.fit(X_train_cv_scaled.values, y_train_cv_scaled)
    # loss_values = model.loss_curve_
    
    # Make iterative forecasts (NOTE: train and val splits are numpy arrays, seasonal helper columns necessary for updating of seasonal lag)
    # print(f'X_val_scaled: {X_val_scaled}')
    # print(f'X_val_scaled: {X_val_scaled.iloc[0]}')
    prediction = autoregressive_iterative_forecast(model, X_val_scaled.iloc[0], X_seasonal_val_scaled, len(y_val_scaled))
    y_hat_train = autoregressive_iterative_forecast(model, X_train_cv_scaled.iloc[0], X_seasonal_train_scaled, len(y_train_cv_scaled))
    prediction = np.array(prediction).flatten()
    y_hat_train = np.array(y_hat_train).flatten()

    rmse = mean_squared_error(y_val_scaled, prediction, squared=False)

    # NOTE: UNCOMMENT FOR ORIGINAL SCALE PLOTTING AND RMSE - Reverse transform to plot original scale train-validation results
    prediction = stdscaler.inverse_transform(prediction.reshape(-1, 1))
    prediction = pt.inverse_transform(prediction.reshape(-1, 1))
    y_hat_train = stdscaler.inverse_transform(y_hat_train.reshape(-1, 1)) 
    y_hat_train = pt.inverse_transform(y_hat_train.reshape(-1, 1)) 
    
    rmse = mean_squared_error(y_val, prediction, squared=False)

    scores.append(rmse)
    
    ## PLOT SCALED PREDICTIONS - Ensure to comment out the inverse scaling just above
    # plt.plot(range(len(y_train_cv_scaled)), y_train_cv_scaled, label='Training Actual', color='blue')
    # plt.plot(range(len(y_train_cv_scaled), len(y_train_cv_scaled) + len(y_val_scaled)), y_val_scaled, label='Validation Actual', color='blue')
    # plt.plot(range(len(y_train_cv_scaled), len(y_train_cv_scaled) + len(y_val_scaled)), prediction, label='Validation Predicted', color='red', linestyle='--')
    # plt.plot(range(len(y_train_cv_scaled)), y_hat_train, label='Train-Set Prediction', color='orange', linestyle='--')
    
    ## PLOT ORIGINAL SCALE
    # plt.plot(range(len(y_train_cv)), y_train_cv, label='Training Actual', color='blue')
    # plt.plot(range(len(y_train_cv), len(y_train_cv) + len(y_val)), y_val, label='Validation Actual', color='blue')
    # plt.plot(range(len(y_train_cv), len(y_train_cv) + len(y_val)), prediction, label='Validation Predicted', color='red', linestyle='--')
    # plt.plot(range(len(y_train_cv)), y_hat_train, label='Train-Set Prediction', color='orange', linestyle='--')
    
    # plt.title(f'Nr: {i}; Lag: {lag}; alpha: {alpha}; hidden layers: {hidden_layer_size}')
    # plt.xlabel('Time')
    # plt.ylabel('Scaled Value')
    # plt.legend()
    # plt.show()
    ########################

    # Fill in parameters and score for each configuration 
    scores_df.loc[i, 'lags'] = lag
    scores_df.loc[i, 'seasonal_lags'] = seasonal
    scores_df.loc[i, 'hidden_layers'] = hidden_layer_size
    scores_df.loc[i, 'alpha'] = alpha
    scores_df.loc[i, 'batch_size'] = batch_size
    scores_df.loc[i, 'activation'] = activation
    scores_df.loc[i, 'learning_rate'] = learning_rate
    scores_df.loc[i, 'RMSE'] = np.mean(scores)
    print(f'{i}/{iterations}: {(i/iterations)*100:.2f}%')
    i += 1

In [None]:
scores_df['RMSE'] = pd.to_numeric(scores_df['RMSE'])
# Best parameters and score
best_config_index = scores_df['RMSE'].idxmin()  # This gets the index of the minimum RMSE
best_config = scores_df.loc[best_config_index]  # Use the index to access the row
best_score = best_config['RMSE']
print(f"Best parameters: {best_config}")
print(f"Best score (RMSE): {best_score}")

In [None]:
scores_df.sort_values(by='RMSE').head(10)

In [None]:
import warnings
from sklearn.exceptions import ConvergenceWarning
from sklearn.preprocessing import PowerTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import TimeSeriesSplit
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sys

In [None]:
rank_nr = 1
for index_nr in scores_df.sort_values(by='RMSE').head(10).index:
    
    best_config = scores_df.loc[index_nr]
    
    best_lag = best_config.values[1]
    best_seasonal_lag = best_config.values[2]
    best_hidden_layers = best_config.values[3]
    best_alpha = best_config.values[4]
    best_batch_size = best_config.values[5]
    best_activation = best_config.values[6]
    best_learning_rate = best_config.values[7]


    # Extract data
    data = merged_data.loc[(merged_data['georegion'] == "region_5") & (merged_data['date'].apply(lambda x: x.year < 2020))]
    # Split the data
    y = data[target]

    # Create lagged features based on the whole y
    df_lagged = create_lagged_features(pd.DataFrame(y, columns=[target]), column=target, number_of_lags=best_lag, seasonal_lags=best_seasonal_lag)

    split = int(len(y) * 0.8)
    # NOTE: SPLIT BEFORE DROPPING TO AVOID DATA LEAKAGE
    df_lagged_train = df_lagged.iloc[:split]
    df_lagged_train = df_lagged_train.dropna()
    df_lagged_test = df_lagged.iloc[split:]

    # Extract training columns and output variable from dataframe
    training_cols = [col for col in df_lagged.columns if ('lag_' in col) and ('helper' not in col)]
    X_train = df_lagged_train[training_cols]
    y_train = df_lagged_train[target]
    X_test = df_lagged_test[training_cols]
    y_test = df_lagged_test[target]

    # Columns required for rolling of seasonal lag in iterative autoregressive forecast
    X_train_seasonal = df_lagged_train['seasonal_lag_52_helper']
    X_test_seasonal = df_lagged_test['seasonal_lag_52_helper']

    # Create combined data to fit transform on all available historical lags in training set
    oldest_lags = X_train.iloc[0, 1:].values.reshape(1, -1) # Take the first row of X_train_cv (the oldest lags)
    combined_data = np.vstack((y_train.values.reshape(-1, 1), oldest_lags.T)) # Concatenate y_train_cv with the oldest lags

    # Fit Yeo-Johnson Transform on combined data
    pt = PowerTransformer(method='yeo-johnson', standardize=False)
    stdscaler = StandardScaler()
    combined_data_transformed = pt.fit_transform(combined_data)
    stdscaler.fit(combined_data_transformed)

    # Apply transform and scaling to train and test sets
    y_train_transformed = pt.transform(y_train.values.reshape(-1, 1)).flatten()
    y_test_transformed = pt.transform(y_test.values.reshape(-1, 1)).flatten()
    X_train_transformed = X_train.apply(lambda x: pt.transform(x.values.reshape(-1, 1)).flatten())
    X_test_transformed = X_test.apply(lambda x: pt.transform(x.values.reshape(-1, 1)).flatten())
    # X_train_transformed = X_train.apply(lambda x: pt.transform(x))
    # X_test_transformed = X_test.apply(lambda x: pt.transform(x.values.reshape(-1, 1))).flatten()
    X_train_seasonal_trans = pt.transform(X_train_seasonal.values.reshape(-1, 1)).flatten()
    X_test_seasonal_trans = pt.transform(X_test_seasonal.values.reshape(-1, 1)).flatten()

    # Apply StandardScaler
    y_train_scaled = stdscaler.transform(y_train_transformed.reshape(-1, 1)).flatten()
    y_test_scaled = stdscaler.transform(y_test_transformed.reshape(-1, 1)).flatten()
    X_train_scaled = X_train_transformed.apply(lambda x: stdscaler.transform(x.values.reshape(-1, 1)).flatten())
    X_test_scaled = X_test_transformed.apply(lambda x: stdscaler.transform(x.values.reshape(-1, 1)).flatten())
    X_train_seasonal_scaled = stdscaler.transform(X_train_seasonal_trans.reshape(-1, 1)).flatten()
    X_test_seasonal_scaled = stdscaler.transform(X_test_seasonal_trans.reshape(-1, 1)).flatten()

    # Initialize the final model configuration
    final_model = MLPRegressor(max_iter=2000, 
                        random_state=42, 
                        solver='adam', 
                        activation=best_activation, 
                        hidden_layer_sizes=(best_hidden_layers), 
                        alpha=best_alpha, 
                        batch_size=best_batch_size, 
                        learning_rate_init=best_learning_rate)

    # Train final model
    final_model.fit(X_train_scaled.values, y_train_scaled) 

    # Forecast for the length of the test set
    forecasts = autoregressive_iterative_forecast(final_model, X_test_scaled.iloc[0], X_test_seasonal_scaled,len(y_test_scaled))
    y_hat_train = autoregressive_iterative_forecast(final_model, X_train_scaled.iloc[0], X_train_seasonal_scaled, len(y_train))

    forecasts = stdscaler.inverse_transform(forecasts.reshape(-1, 1))
    forecasts = pt.inverse_transform(forecasts.reshape(-1, 1))
    y_hat_train = stdscaler.inverse_transform(y_hat_train.reshape(-1, 1))
    y_hat_train = pt.inverse_transform(y_hat_train.reshape(-1, 1))

    # Evaluate the forecasts against the actual y_test values
    rmse = mean_squared_error(y_test, forecasts, squared=False)

    # Plot the results
    fig, ax = plt.subplots(figsize=(10, 5))

    # Plot the true values
    # ax.plot(plot['incValue'])

    ax.plot(df_lagged[target], label="True Train", alpha=1, color='lightblue')
    ax.plot(y_test.index, y_test, label="True Test", alpha=0.8, color='blue')
    ax.plot(y_test.index, forecasts, label='Predictions', alpha=0.7, color='red', linestyle='--')
    ax.plot(y_train.index, y_hat_train, label='Prediction on Train', alpha=0.7, color='grey', linestyle='--')


    # Add labels and legend
    ax.set_xlabel("Date")
    ax.set_ylabel("Incidence")
    ax.set_title(f'Nr: {rank_nr}, RMSE: {rmse}, lag: {best_lag}, hidden layers: {best_hidden_layers}, alpha: {best_alpha:.4f}, learning rate: {best_learning_rate:.6f}', fontsize=10)
    ax.legend()
    rank_nr += 1