# Import

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, Input, GRU, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping
from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics
from prophet.plot import plot_cross_validation_metric
import optuna
import statsmodels.api as sm
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
from pmdarima import auto_arima
from pmdarima.arima import StepwiseContext
import os
import holidays
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


#Save requirements
os.system("pip freeze > requirements.txt")

#SEED   
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
df = pd.read_csv('Data/london_merged.csv')
display(df.head(2))

- Metadata:
  - "timestamp" - timestamp field for grouping the data
  - "cnt" - the count of a new bike shares
  - "t1" - real temperature in C
  - "t2" - temperature in C "feels like"
  - "hum" - humidity in percentage
  - "wind_speed" - wind speed in km/h
  - "weather_code" - category of the weather
  - "is_holiday" - boolean field - 1 holiday / 0 non holiday
  - "is_weekend" - boolean field - 1 if the day is weekend
  - "season" - category field meteorological seasons: 0-spring ; 1-summer; 2-fall; 3-winter.
  - "weathe_code" category description:
     - 1 = Clear ; mostly clear but have some values with haze/fog/patches of fog/ fog in vicinity 
     - 2 = scattered clouds / few clouds 
     - 3 = Broken clouds 
     - 4 = Cloudy 
     - 7 = Rain/ light Rain shower/ Light rain 
     - 10 = rain with thunderstorm 
     - 26 = snowfall 
     - 94 = Freezing Fog

In [None]:
df.info()

# Data Wrangling

In [None]:
#Convert the timestamp to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'])
#Keep records from 2015 and 2016
df = df[(df['timestamp'].dt.year == 2015) | (df['timestamp'].dt.year == 2016)]
#Sort the values by timestamp
df = df.sort_values('timestamp')

In [None]:
#Missing values
df.isnull().sum()

- No missing values. But there might be missing timestamps.

In [None]:
#Check for missing timestamps
all_days = pd.date_range(start=df['timestamp'].min(), end=df['timestamp'].max(), freq='h')
missing_days = all_days[~all_days.isin(df['timestamp'])]
print('Number of missing timestamps:', len(missing_days))

In [None]:
missing_days[0]

- 130 timestamps are missing. We will imput them using existing values.

In [None]:
#London holidays
uk_holidays = holidays.UK(years=[df['timestamp'].dt.year.min(), df['timestamp'].dt.year.max()])
uk_holidays

In [None]:
#Create new dataframe using all days
df_full = pd.DataFrame(all_days, columns=['timestamp'])
#Merge with df to get cnt, t1, t2, hum, wind_speed, weather_code, season
df_full = df_full.merge(df[['timestamp', 'cnt', 't1', 't2', 'hum', 'wind_speed', 'weather_code', 'season']], on='timestamp', how='left')
#is_holiday column: 1 if holiday, 0 if not
df_full['is_holiday'] = np.where(df_full['timestamp'].dt.date.isin(uk_holidays), 1, 0)
df_full['is_weekend'] = np.where(df_full['timestamp'].dt.dayofweek.isin([5, 6]), 1, 0)

#Backfill missing values
df_full = df_full.ffill()
df = df_full.copy()

In [None]:
df.isnull().sum()

In [None]:
missing_days = all_days[~all_days.isin(df['timestamp'])]
print('Number of missing timestamps:', len(missing_days))

In [None]:
#Set the timestamp as the index
df.set_index('timestamp', inplace=True)
#Set period to 1 hour
df.index = pd.DatetimeIndex(df.index).to_period('h')
df.head(2)

In [None]:
df = df.resample('D').agg({'cnt':'sum', 
                           't1':'median', 
                           't2':'median', 
                           'hum':'median', 
                           'wind_speed':'median', 
                           'weather_code': lambda x: x.value_counts().index[0], 
                           'season': lambda x: x.value_counts().index[0], 
                           'is_holiday':'max', 
                           'is_weekend':'max'})
df.head(2)

In [None]:
#Remove duplicates
df.drop_duplicates(inplace=True)

In [None]:
# # Boxplot of all the columns
plt.figure(figsize=(10, 12))
cols = df.columns
print(cols)
for i in range(1, len(cols)):
    print(cols[i])
    plt.subplot(3, 3, i)
    sns.boxplot(df[cols[i-1]])
    plt.title(cols[i-1])
plt.tight_layout()
plt.show()

- There is no abnormal data in the dataset.

In [None]:
# Correlation matrix
plt.figure(figsize=(10, 8))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.show()

- Real and feels like temperature are highly correlated. Let's use feels like temperature since it is more likely to impact the decision.
- Humidity is also correlated with temperature. We will drop it.

In [None]:
#Drop t1
df.drop(['t1', 'hum'], axis=1, inplace=True)

In [None]:
# #Pairplot
sns.pairplot(df[['cnt', 't2', 'wind_speed', 'is_holiday', 'is_weekend', 'weather_code',
                  'season',
                  ]],
             hue='cnt', 
             palette='coolwarm',
             height=3,
             aspect=1.5)
plt.show()

In [None]:
#Map codes
#Map weather code:
weather_desc = {
    1: 'Clear', 2: 'Scattered_Clouds', 3: 'Broken_Clouds', 4: 'Cloudy', 7: 'Rain', 10: 'Storm', 26: 'Snowfall', 94: 'Freezing_Fog'
}
df['weather_code'] = df['weather_code'].map(weather_desc)

# #Map is_holiday:
# df['is_holiday'] = df['is_holiday'].map({0:'No_Holiday', 1:'Holiday'})

# #Map is_weekend:
# df['is_weekend'] = df['is_weekend'].map({0:'Weekday', 1:'Weekend'})

#Map season:
seasons = {0:'Spring', 1:'Summer', 2:'Fall', 3:'Winter'}
df['season'] = df['season'].map(seasons)
df.head(2)

In [None]:
#One hot encoding for categorical variables
df = pd.get_dummies(df, drop_first=True, dtype=int)
df.head(2)

In [None]:
# Add month sine and cosine columns
df['month_sin'] = np.sin(2*np.pi*df.index.month/12)
df['month_cos'] = np.cos(2*np.pi*df.index.month/12)

In [None]:
#Training df till June 2016 and testing df from July 2016
train_df = df.loc[:'2016-06-30'].copy()
test_df = df.loc['2016-07-01':].copy()

In [None]:
#In case some models need validation set, split train_df to get validation set. Use validation from April 2016 to June 2016. 
train_train_df = train_df.loc[:'2016-03-31'].copy()
val_df = train_df.loc['2016-04-01':].copy()

# Diagnosis

### Check for seasonality

In [None]:
#Plot seasonal decomposition
from statsmodels.tsa.seasonal import seasonal_decompose

fig_df = train_df.copy()
fig_df = fig_df.asfreq('D')
fig_df.index = pd.DatetimeIndex(fig_df.index.to_timestamp())
print(fig_df.index.freq)
fig, axes = plt.subplots(4, 1, figsize=(20, 8))
seasonal_decompose = seasonal_decompose(fig_df['cnt'], model='additive')
seasonal_decompose.observed.plot(ax=axes[0], title='Observed')
seasonal_decompose.trend.plot(ax=axes[1], title='Trend')
seasonal_decompose.seasonal.plot(ax=axes[2], title='Seasonal')
seasonal_decompose.resid.plot(ax=axes[3], title='Residual')
plt.tight_layout()
plt.show()

There is clear seasonality in the data. Also, the data does not look stationary.

### Check for stationarity

In [None]:
def check_stationarity(data):
    print('Null Hypothesis: Presence of unit root (Data is not stationary)')
    print('Alternate Hypothesis: Absence of unit root (Data is stationary)')
    result = adfuller(data, autolag='AIC')
    print(result)
    print('ADF Statistic:', result[0])
    print('Lags: ')
    print('p-value:', result[1])
    if result[1] > 0.05:
        print('Data is not stationary')
    else:
        print('Data is stationary')

check_stationarity(train_df['cnt'])

Hence, the models to be used should be able to handle seasonality and non-stationarity. SARIMAX might be a good choice. However, there are multiple seasonalities in the data which might be difficult to capture with SARIMAX. We will use Facebook Prophet and LSTM for this task. Let's still try SARIMAX and see how it performs.

# Models

## SARIMAX

- We will use Auto ARIMA to find the best parameter and not rely only on the ACF and PACF plots. However, ACF and PACF plots are useful to define the search space for Auto ARIMA. It is to note that Auto Arima leads to memory issue for high order models. Therefore, we will limit the search space.

In [None]:
#Standardize the data
scaler = MinMaxScaler()
X_train = train_df.drop('cnt', axis=1)
y_train = train_df['cnt']

X_test = test_df.drop('cnt', axis=1)
y_test = test_df['cnt']

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

feature_columns = X_train.columns
feature_columns

In [None]:
#Auto ARIMA
with StepwiseContext(max_steps=3):
    model = auto_arima(y=y_train,
                        X=X_train_scaled,
                        start_p=0,
                        d=None,
                        start_q=0,
                        max_p=3,
                        max_d=7,
                        max_q=3,
                        start_P=0,
                        D=None,
                        start_Q=0,
                        m=52,
                        max_P=5,
                        max_D=7,
                        max_Q=5,
                        stationary=False,
                        seasonal=True,
                        stepwise = True,
                        random=False,
                        random_state=42,
                        njobs=1,
                        scoring='mae',
                        maxiter=50,
                        trace=True,
                        )
    print(model.summary())

In [None]:
#Predictions
y_pred = model.predict(n_periods=len(y_test), X=X_test_scaled)
y_pred = pd.Series(y_pred, index=y_test.index)

#RMSE, MAE, MAPE, R2
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
r2 = r2_score(y_test, y_pred)
print('RMSE:', rmse)
print('MAE:', mae)
print('MAPE:', mape)
print('R2:', r2)

#Plot actual vs predicted
plt.figure(figsize=(20, 6))
# plt.plot(y_train.index.to_timestamp(), y_train, label='Train')
plt.plot(y_test.index.to_timestamp(), y_test, label='Actual')
plt.plot(y_test.index.to_timestamp(), y_pred, label='Predicted')
plt.xticks(rotation=90)
plt.legend()
plt.show()

## XGBoost

In [None]:
# XGBoost
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

X_train = train_train_df.drop(['cnt'], axis=1)
y_train = train_train_df['cnt']

X_val = val_df.drop(['cnt'], axis=1)
y_val = val_df['cnt']

X_test = test_df.drop(['cnt'], axis=1)
y_test = test_df['cnt']

#Standardize the data
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

In [None]:
#XGBoost tuning using optuna
def xgboost_objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1100, step=100),
        'max_depth': trial.suggest_int('max_depth', 3, 40),
        'learning_rate': trial.suggest_float('learning_rate', 0.00001, 0.1, log=True),
        'subsample': trial.suggest_float('subsample', 0.1, 1.0, step=0.1),
        'random_state': 42
    }
    
    model = XGBRegressor(verbosity=1, **params)
    model.fit(X_train_scaled, y_train)
    
    y_pred = model.predict(X_val_scaled)
    return mean_absolute_error(y_val, y_pred)

study_name = 'xgboost_study'
#Delete the study if it exists
try:
    optuna.delete_study(study_name = study_name, storage=f'sqlite:///{study_name}.db')
except:
    pass
storage = f'sqlite:///{study_name}.db'
study = optuna.create_study(study_name=study_name, storage=storage, load_if_exists=True, sampler=optuna.samplers.TPESampler(seed=42))
study.optimize(xgboost_objective, n_trials=100, n_jobs=5, show_progress_bar=True)

In [None]:
# Best parameters
best_params = study.best_params
print('Best parameters:', best_params)

#Train the model with best parameters
model = XGBRegressor(verbosity=1, **best_params)
model.fit(X_train_scaled, y_train)

#Predict on test set
y_pred = model.predict(X_test_scaled)
#RMSE, MAE, MAPE, R2
rmse = np.sqrt(np.mean((y_test - y_pred)**2))
mae = np.mean(np.abs(y_test - y_pred))
r2 = 1 - (np.sum((y_test - y_pred)**2) / np.sum((y_test - y_test.mean())**2))
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
print('RMSE:', rmse)
print('MAE:', mae)
print('R2:', r2)
print('MAPE:', mape)

#Plot predictions vs actual
plt.figure(figsize=(20, 6))
plt.plot(train_df.index.to_timestamp(), train_df['cnt'], label='Train')
plt.plot(test_df.index.to_timestamp(), test_df['cnt'], label='Test')
plt.plot(test_df.index.to_timestamp(), y_pred, label='Predictions')
plt.title('Bike Rentals')
plt.xlabel('Date')
plt.ylabel('Number of Rentals')
plt.legend()
plt.show()

## Prophet

In [None]:
pro_train_df = train_df.copy()
pro_train_df.reset_index(inplace=True)
pro_train_df.rename(columns={'timestamp':'ds', 'cnt':'y'}, inplace=True)
pro_train_df['ds'] = pro_train_df['ds'].dt.to_timestamp()

model = Prophet(weekly_seasonality=True, growth='flat', yearly_seasonality=True, interval_width=0.95, scaling='minmax')
#Add holiday regressor
model.add_country_holidays(country_name='UK')
model.add_regressor('t2')
model.add_regressor('wind_speed')
model.add_regressor('is_holiday')
model.add_regressor('is_weekend')
model.add_regressor('weather_code_Clear')
model.add_regressor('weather_code_Cloudy')
model.add_regressor('weather_code_Rain')
model.add_regressor('weather_code_Scattered_Clouds')
model.add_regressor('weather_code_Snowfall')
model.add_regressor('season_Spring')
model.add_regressor('season_Summer')
model.add_regressor('season_Winter')
# model.add_regressor('month_sin')
# model.add_regressor('month_cos')

model.fit(pro_train_df)

#Predict on test set
pro_test_df = test_df.copy()
pro_test_df.reset_index(inplace=True)
pro_test_df.rename(columns={'timestamp':'ds', 'cnt':'y'}, inplace=True)
pro_test_df['ds'] = pro_test_df['ds'].dt.to_timestamp()

y_pred = model.predict(pro_test_df)
# model.plot_components(y_pred)
# plt.show()
y_pred.set_index('ds', inplace=True)

In [None]:
#RMSE, MAE, R2, MAPE
rmse = np.sqrt(mean_squared_error(test_df['cnt'], y_pred['yhat']))
mae = mean_absolute_error(test_df['cnt'], y_pred['yhat'])
r2 = r2_score(test_df['cnt'], y_pred['yhat'])
mape = np.mean(np.abs((test_df.to_timestamp()['cnt'] - y_pred['yhat']) / test_df.to_timestamp()['cnt'])) * 100
print('RMSE:', rmse)
print('MAE:', mae)
print('R2:', r2)
print('MAPE:', mape)

#Plot predictions vs actual with confidence intervals
plt.figure(figsize=(20, 6))
plt.plot(train_df.index.to_timestamp(), train_df['cnt'], label='Train')
plt.plot(test_df.index.to_timestamp(), test_df['cnt'], label='Test')
plt.plot(test_df.index.to_timestamp(), y_pred['yhat'], label='Predictions')
plt.fill_between(test_df.index.to_timestamp(), y_pred['yhat_lower'], y_pred['yhat_upper'], color='gray', alpha=0.2)
plt.title('Bike Rentals')
plt.xlabel('Date')
plt.ylabel('Number of Rentals')
plt.legend()
plt.show()


## LSTM

In [None]:
#Clean previous model
tf.compat.v1.reset_default_graph()

feature_columns = train_df.drop('cnt', axis=1).columns
target_column = 'cnt'

#Scale the data
scaler = MinMaxScaler()
train_df_scaled = train_df.copy()
test_df_scaled = test_df.copy()
train_df_scaled[feature_columns] = scaler.fit_transform(train_df[feature_columns])
test_df_scaled[feature_columns] = scaler.transform(test_df[feature_columns])

X_train = train_df_scaled.drop(target_column, axis=1)
y_train = train_df_scaled[target_column]

X_test = test_df_scaled.drop(target_column, axis=1)
y_test = test_df_scaled[target_column]

lookback = 3
forecast_horizon = 2

#Create data with lookback window of 7 days and 14 days forecast
def create_dataset(X, y, lookback=1, forecast_horizon=1):
    Xs, ys = [], []
    for i in range(len(X) - lookback - forecast_horizon + 1):
        Xs.append(X.iloc[i:(i+lookback)].values)
        ys.append(y.iloc[(i+lookback):(i+lookback+forecast_horizon)].values)
    return np.array(Xs), np.array(ys)

X_train, y_train = create_dataset(X_train, y_train, lookback, forecast_horizon)
X_test, y_test = create_dataset(X_test, y_test, lookback, forecast_horizon)

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

features = X_train.shape[2]

#Tuning with optuna
def lstm_objective(trial):
    model = Sequential()
    model.add(Input(shape=(lookback, features)))
    model.add(LSTM(units=trial.suggest_int('units', 32, 256, step=32), activation='relu', return_sequences=True))
    model.add(Dropout(trial.suggest_float('dropout', 0.1, 0.5, step=0.1)))
    model.add(LSTM(units=trial.suggest_int('units', 32, 256, step=32), activation='relu', return_sequences=False))
    model.add(Dropout(trial.suggest_float('dropout', 0.1, 0.5, step=0.1)))
    model.add(Dense(forecast_horizon))

    lr = trial.suggest_float('lr', 1e-4, 1e-1, log=True)
    
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr), loss='mse', metrics=['mae'])
    
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10, min_delta=5, restore_best_weights=True)
    
    history = model.fit(
        X_train, y_train,
        epochs=200,
        batch_size=32,
        validation_split=0.2,
        verbose=0,
        callbacks=[es]
    )
    
    return np.min(history.history['val_loss'])

study_name = 'lstm_study'
#Delete the study if it exists
try:
    optuna.delete_study(study_name = study_name, storage=f'sqlite:///{study_name}.db')
except:
    pass
storage = f'sqlite:///{study_name}.db'
study = optuna.create_study(study_name=study_name, storage=storage, load_if_exists=True, sampler=optuna.samplers.TPESampler(seed=42))
study.optimize(lstm_objective, n_trials=50, n_jobs=5, show_progress_bar=True)



In [None]:
# Best parameters
best_params = study.best_params
print('Best parameters:', best_params)

#Train the model with best parameters
model = Sequential()
model.add(Input(shape=(lookback, features)))
model.add(LSTM(units=best_params['units'], activation='relu', return_sequences=True))
model.add(Dropout(best_params['dropout']))
model.add(LSTM(units=best_params['units'], activation='relu', return_sequences=False))
model.add(Dropout(best_params['dropout']))
model.add(Dense(forecast_horizon))

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=2100, min_delta=5, restore_best_weights=True)

model.compile(optimizer='adam', loss='mse', metrics=['mae'])

history = model.fit(
    X_train, y_train.reshape(y_train.shape[0], -1),
    epochs=400,
    batch_size=32,
    validation_split=0.2,
    verbose=1,
    callbacks=[es]
)

predictions = model.predict(X_test)
predictions = predictions.reshape(predictions.shape[0], -1)

In [None]:
#Plot training and validation loss
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
#RMSE, MAE, R2, MAPE
rmse = np.sqrt(mean_squared_error(y_test, predictions))
mae = mean_absolute_error(y_test, predictions)
r2 = r2_score(y_test, predictions)
mape = np.mean(np.abs((y_test - predictions) / y_test)) * 100
print('RMSE:', rmse)
print('MAE:', mae)
print('R2:', r2)
print('MAPE:', mape)

In [None]:
#Plot predictions vs actual
plt.figure(figsize=(20, 6))
plt.plot(train_df.index[lookback:].strftime('%Y-%m-%d'), train_df['cnt'][lookback:], label='Train')
plt.plot(test_df.index[lookback:].strftime('%Y-%m-%d'), test_df['cnt'][lookback:], label='Actual')
plt.plot(test_df.index[lookback+1:].strftime('%Y-%m-%d'), predictions[:, 0], label='Predictions')
plt.title('Bike Rentals')
plt.xlabel('Date')
plt.ylabel('Number of Rentals')
plt.legend()
plt.show()

## LSTM with encoder and decoder

In [None]:
from tensorflow.keras.utils import plot_model
#Clean previous model
tf.compat.v1.reset_default_graph

feature_columns = train_df.drop('cnt', axis=1).columns
target_column = 'cnt'

#Scale the data
scaler = MinMaxScaler()
train_df_scaled = train_train_df.copy()
val_df_scaled = val_df.copy()
test_df_scaled = test_df.copy()
train_df_scaled[feature_columns] = scaler.fit_transform(train_df_scaled[feature_columns])
val_df_scaled[feature_columns] = scaler.transform(val_df_scaled[feature_columns])
test_df_scaled[feature_columns] = scaler.transform(test_df_scaled[feature_columns])

def create_dataset(df, n_deterministic_features, window_size, forecast_window, batch_size):
    size = window_size + forecast_window

    data = tf.data.Dataset.from_tensor_slices(df.values)

    data = data.window(size, shift=1, drop_remainder=True)
    data = data.flat_map(lambda x: x.batch(size))

    data = data.shuffle(int(len(df)/2), seed=42)
    data = data.map(lambda x: ((x[:-forecast_window],
                                  x[-forecast_window:, n_deterministic_features:]),
                                  x[-forecast_window:,0]))
    
    data = data.batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)

    return data

lookback = 14 #days
forecast_horizon = 7 #day

number_total_features = len(train_df.columns)
number_aleatoric_features = 1 #Only cnt is aleatoric
number_deterministic_features = number_total_features - number_aleatoric_features

batch_size = 32
training_window = create_dataset(train_df_scaled,
                                 number_deterministic_features,
                                 lookback,
                                 forecast_horizon,
                                 batch_size)

validation_window = create_dataset(val_df_scaled,
                                   number_deterministic_features,
                                   lookback,
                                   forecast_horizon,
                                   batch_size)

testing_window = create_dataset(test_df_scaled,
                                number_deterministic_features,
                                lookback,
                                forecast_horizon,
                                batch_size)  


dim = 32
past_inputs = Input(shape=(lookback, number_total_features), name='past_inputs')
encoder = LSTM(dim, return_state=True)
encoder_outputs, state_h, state_c = encoder(past_inputs)

future_inputs = Input(shape=(forecast_horizon, number_deterministic_features), name='future_inputs')
decoder = LSTM(dim, return_sequences=True)

print(past_inputs.shape, future_inputs.shape)

x = decoder(future_inputs, initial_state=[state_h, state_c])
x = Dense(16, activation='relu')(x)
x = Dense(16, activation='relu')(x)
output = Dense(1, activation='relu')(x)

model = tf.keras.Model(inputs=[past_inputs, future_inputs], outputs=output)

model.compile(tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse', metrics=['mae'])

print(model.summary())

# es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10, min_delta=0, restore_best_weights=True)

history = model.fit(training_window, epochs=50, validation_data=validation_window, verbose=1)


