In [None]:
!pip install tensorflow_addons numpy pandas



In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Input, Dense, LSTM,LayerNormalization, MultiHeadAttention, Dropout
from tensorflow.keras.models import Model
from sklearn.preprocessing import MinMaxScaler

In [None]:
# Load dataset
df = pd.read_csv('weatherstats_toronto_daily.csv')
df.fillna(0, inplace=True)
df['date'] = pd.to_datetime(df['date'], format='%d-%m-%Y')
df['EWMA_max_temperature'] = df['max_temperature'].ewm(span=7).mean()
df['EWMA_min_temperature'] = df['min_temperature'].ewm(span=7).mean()
df.set_index('date', inplace=True)

In [None]:
def add_seasonal_features(df, timestamp_col, period, fourier_order):
    time = df.index.dayofyear
    for k in range(1, fourier_order + 1):
        df[f'sin_{period}_{k}'] = np.sin(2 * np.pi * k * time / period)
        df[f'cos_{period}_{k}'] = np.cos(2 * np.pi * k * time / period)
    return df

# Add yearly seasonal features (365 days in a year)
df = add_seasonal_features(df, timestamp_col='date', period=365, fourier_order=4)

In [None]:
# Normalize the Data
scaler = MinMaxScaler()
df_numeric = df.select_dtypes(include=[np.number])
data_scaled = scaler.fit_transform(df_numeric)
data_scaled

array([[0.45670628, 0.47903977, 0.48014122, ..., 0.29334761, 0.23099492,
        0.92146931],
       [0.36162988, 0.37620924, 0.37069726, ..., 0.27012011, 0.2026339 ,
        0.90196217],
       [0.47028862, 0.46202078, 0.44660194, ..., 0.24750554, 0.17568222,
        0.88054992],
       ...,
       [0.38539898, 0.42135435, 0.43159753, ..., 0.99401122, 0.6025532 ,
        0.98936984],
       [0.38539898, 0.43837334, 0.44112974, ..., 0.99733536, 0.56864002,
        0.99526614],
       [0.36672326, 0.39161591, 0.38217123, ..., 0.9993334 , 0.53440153,
        0.99881513]])

In [None]:
# Define Window Size
time_steps = 30
forecast_horizon = 10
# Prepare Input-Output Sequences
def create_sequences(data, time_steps, forecast_horizon):
    X_past, X_future, y = [], [], []
    for i in range(len(data) - time_steps - forecast_horizon):
        X_past.append(data[i:i+time_steps])
        X_future.append(data[i+time_steps:i+time_steps+forecast_horizon])
        y.append(data[i+time_steps:i+time_steps+forecast_horizon])
    return np.array(X_past), np.array(X_future), np.array(y)

X_past, X_future, y = create_sequences(data_scaled, time_steps, forecast_horizon)
#X_static = np.zeros((X_past.shape[0], 1))

In [None]:
X_past.shape

(26341, 30, 48)

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, LayerNormalization, MultiHeadAttention, Dropout, LSTM, Multiply
from tensorflow.keras.models import Model

# Model Parameters
time_steps = 30   # Past 30 days as input
num_features = X_past.shape[2]  # Max Temp, Min Temp, Rainfall, Snowfall
forecast_horizon = 10  # Predict next 10 days
d_model = 128    # Embedding size
num_heads = 8    # Multi-head attention
ff_dim = 256     # Feedforward layer size
dropout_rate = 0.1
lstm_units = 64  # LSTM hidden state size

# Variable Selection Network (VSM)
def variable_selection_network(inputs, name="VSN"):
    selection_weights = Dense(num_features, activation='softmax', name=f"{name}_weights")(inputs)
    return Multiply()([inputs, selection_weights])  # Element-wise multiplication

# Input Layers
past_inputs = Input(shape=(time_steps, num_features))  # Past observed variables
future_inputs = Input(shape=(forecast_horizon, num_features))  # Future known variables

# Apply Variable Selection Network
past_selected = variable_selection_network(past_inputs, name="past_VSN")
future_selected = variable_selection_network(future_inputs, name="future_VSN")

# LSTM-Based Encoding
past_lstm = LSTM(lstm_units, return_sequences=True, name="past_LSTM")(past_selected)
future_lstm = LSTM(lstm_units, return_sequences=True, name="future_LSTM")(future_selected)

# Multi-Head Attention on Encoded Data
encoder_attention = MultiHeadAttention(num_heads=num_heads, key_dim=d_model)(past_lstm, past_lstm)
encoder_attention = Dropout(dropout_rate)(encoder_attention)
encoder_attention = LayerNormalization(epsilon=1e-6)(past_lstm + encoder_attention)

# Decoder Attention (Cross-Attention)
decoder_attention = MultiHeadAttention(num_heads=num_heads, key_dim=d_model)(future_lstm, encoder_attention)
decoder_attention = Dropout(dropout_rate)(decoder_attention)
decoder_attention = LayerNormalization(epsilon=1e-6)(future_lstm + decoder_attention)

# Reshape decoder_attention to match d_model for gating
decoder_attention_resized = Dense(d_model)(decoder_attention)  # Align dimensions with d_model

# Gating Mechanism
gate = Dense(d_model, activation='sigmoid')(decoder_attention_resized)
gated_output = Multiply()([gate, decoder_attention_resized])  # Element-wise multiplication

# Feedforward Network
ff_output = Dense(ff_dim, activation="relu")(gated_output)
ff_output = Dense(d_model)(ff_output)
ff_output = Dropout(dropout_rate)(ff_output)
ff_output = LayerNormalization(epsilon=1e-6)(gated_output + ff_output)

# Output Layer (Predict next 10 days)
outputs = Dense(forecast_horizon * num_features)(ff_output[:, -1, :])  # Use last time step
outputs = tf.reshape(outputs, (-1, forecast_horizon, num_features))  # Reshape output

# Define the Model
model = Model(inputs=[past_inputs, future_inputs], outputs=outputs)

# Compile Model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Model Summary
model.summary()



Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 30, 48)]             0         []                            
                                                                                                  
 past_VSN_weights (Dense)    (None, 30, 48)               2352      ['input_1[0][0]']             
                                                                                                  
 multiply (Multiply)         (None, 30, 48)               0         ['input_1[0][0]',             
                                                                     'past_VSN_weights[0][0]']    
                                                                                                  
 past_LSTM (LSTM)            (None, 30, 64)               28928     ['multiply[0][0]']        

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(
    monitor='val_loss',  # Monitor validation loss
    patience=5,          # Stop training after 3 epochs of no improvement
    restore_best_weights=True  # Restore the best model weights
)

In [None]:
model.fit([X_past, X_future], y, epochs=100, batch_size=64, validation_split=0.2, callbacks=[early_stopping])

Epoch 1/100
Epoch 2/100
Epoch 3/100

In [None]:
split = int(len(X_past) * 0.8)  # Assuming an 80/20 train-test split
X_train_past, X_past_test = X_past[:split], X_past[split:]
X_train_future, X_future_test= X_future[:split], X_future[split:]
X_train, X_test = (X_past[:split], X_future[:split]), (X_past[split:], X_future[split:])

In [None]:
import numpy as np
import tensorflow as tf

# Prepare the encoder input (last 30 days for past data)
encoder_input_past = X_past_test[-1:]  # Shape: (1, 30, 5)

# Prepare the future known inputs (last 10 days for future data)
encoder_input_future = X_future_test[-1:]  # Shape: (1, 10, 5)

# If necessary, reshape the inputs to match the model's expected input shape
# If the model accepts two inputs (past and future data), you can pass them as a list
encoder_input = [encoder_input_past, encoder_input_future]  # Shape: [(1, 30, 5), (1, 10, 5)]

# Prepare the future known inputs (use zeros if unknown, depending on the model's expectation)
future_input = np.zeros((1, forecast_horizon, encoder_input_past[0].shape[1]))  # Shape: (1, 10, 5)

# Predict the next 10 days using TFT model
predicted_10_days = model.predict([encoder_input_past, future_input])

# If you want to rescale back the predictions, make sure to reshape correctly
predicted_10_days_scaled = scaler.inverse_transform(predicted_10_days[0])  # Assuming scaler is fitted on the model's output




In [None]:
np.set_printoptions(suppress=True)
value=predicted_10_days_scaled[:,[0,1]]
value

array([[ 6.2047234, -7.9987087],
       [ 3.8205767, -3.9174216],
       [ 5.454963 , -8.560996 ],
       [ 7.780187 , -4.948758 ],
       [ 7.7944145, -2.916914 ],
       [ 8.084926 , -4.020312 ],
       [ 9.82324  , -4.624658 ],
       [13.54732  , -2.2530513],
       [14.76525  ,  1.3223221],
       [10.124108 ,  2.2637098]], dtype=float32)