#Set up & Install

In [1]:
!pip install torchmetrics



#Model

Comparing model
- Original LSTM
- Bidirectional LSTM
- Unidirectional GRU
- Custom Mamba
- MG SMM
- MG SMM-s

##Original LSTM

In [2]:
import torch
import torch.nn as nn

class OriginalLSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(OriginalLSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # print(x.shape)
        # Initialize hidden and cell states
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return out

##Bi LSTM

In [3]:
import torch.nn as nn

class BidirectionalLSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(BidirectionalLSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        # batch_first=True allows input and output tensors of shape (batch_size, seq_len, features)
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        # The output of a bidirectional LSTM is the concatenation of the forward and backward hidden states
        # So the linear layer's input size is 2 * hidden_size
        self.fc = nn.Linear(hidden_size * 2, output_size)

    def forward(self, x):
        # Initialize hidden and cell states
        # For bidirectional LSTM, the shape is (2 * num_layers, batch_size, hidden_size)
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))  # out: tensor of shape (batch_size, seq_length, 2 * hidden_size)

        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return out

##Original GRU

In [4]:
import torch
import torch.nn as nn

class UnidirectionalGRUModel(nn.Module):
    """
    A standard (unidirectional) Gated Recurrent Unit (GRU) model in PyTorch.
    It processes a sequence and uses the last hidden state for a linear classification/regression layer.
    """
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        """
        Initializes the UnidirectionalGRUModel.

        Args:
            input_size (int): The number of expected features in the input $x$.
            hidden_size (int): The number of features in the hidden state $h$.
            num_layers (int): Number of recurrent layers.
            output_size (int): The size of the output from the final linear layer.
        """
        super(UnidirectionalGRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # Changed 'bidirectional=True' to 'bidirectional=False' (or just omit it, as False is the default)
        # batch_first=True allows input and output tensors of shape (batch_size, seq_len, features)
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, bidirectional=False)

        # The output of a unidirectional GRU at each time step is just the hidden_size
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        """
        Performs the forward pass of the model.

        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, seq_len, input_size).

        Returns:
            torch.Tensor: Output tensor of shape (batch_size, output_size).
        """
        # Initialize the hidden state
        # For a unidirectional GRU, the shape is (num_layers, batch_size, hidden_size)
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate GRU
        # out: tensor of shape (batch_size, seq_length, hidden_size)
        # hn: tensor of shape (num_layers, batch_size, hidden_size)
        out, _ = self.gru(x, h0)

        # Decode the hidden state of the *last* time step.
        # out[:, -1, :] selects the output features for the last element in the sequence
        # for all batches. This output is the final hidden state of the top layer.
        out = self.fc(out[:, -1, :])
        return out

##Custom mamba

In [5]:
class Custom_MambaCell(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Custom_MambaCell, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size

        # cell input gate
        self.W_ic = nn.Linear(input_size, hidden_size)
        self.W_hc = nn.Linear(hidden_size, hidden_size)
        self.b_c = nn.Parameter(torch.zeros(hidden_size))

    def forward(self, x, hidden_state):
        h_prev = hidden_state

        # Hidden state
        h_t = self.W_ic(x) + self.W_hc(h_prev) + self.b_c

        return h_t

class Custom_MambaModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(Custom_MambaModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm_cells = nn.ModuleList([Custom_MambaCell(input_size if i == 0 else hidden_size, hidden_size) for i in range(num_layers)])
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        batch_size, seq_len, _ = x.size()
        hidden_state = [torch.zeros(batch_size, self.hidden_size).to(x.device) # Initialize with a single tensor
                         for _ in range(self.num_layers)]

        for t in range(seq_len):
            input_t = x[:, t, :]
            for i in range(self.num_layers):
                h_prev = hidden_state[i]

                # Mamba part (using the Custom_MambaCell)
                h_t = self.lstm_cells[i](input_t, h_prev) # Pass a single tensor
                hidden_state[i] = h_t
                input_t = h_t # Use the output of the current layer as input for the next layer

        # Decode the hidden state of the last time step of the last layer
        last_hidden_state = hidden_state[-1]
        out = self.fc(last_hidden_state)
        return out

##MG SMM

In [35]:
class MgSmmCell(nn.Module):
    def __init__(self, input_size, hidden_size, gate_size):
        super(MgSmmCell, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.gate_size = gate_size

        # linear input gate
        self.W_A = nn.Linear(hidden_size, hidden_size)
        self.W_B = nn.Linear(input_size, hidden_size)
        self.W_bh = nn.Parameter(torch.zeros(hidden_size))

        # multiplicative input gate
        self.W_E = nn.Linear(gate_size, gate_size)
        self.W_bg = nn.Parameter(torch.zeros(gate_size))

    def forward(self, x, hidden_state):
        h_prev, g_prev = hidden_state

        # Hidden state
        h_t =  self.W_A(h_prev) + self.W_B(x) +  self.W_bh
        g_t =  self.W_E(g_prev) * torch.repeat_interleave(x, self.gate_size, dim=1)

        return h_t, g_t

class MgSmmModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, gate_size = 0):
        """
        Initializes the CustomMambaModel.

        Args:
            input_size (int): The number of expected features in the input $x$.
            hidden_size (int): The number of features in the hidden state $h$.
            num_layers (int): Number of recurrent layers.
            output_size (int): The size of the output from the final linear layer.
            gate_size (int): The number of features in the multiplicative gate cell.
        """
        super(MgSmmModel, self).__init__()
        self.hidden_size = hidden_size
        if gate_size == 0:
            gate_size = int(hidden_size/2)
        self.gate_size = gate_size

        self.num_layers = num_layers
        self.SSM_blocks = nn.ModuleList([MgSmmCell(input_size if i == 0 else hidden_size, hidden_size, gate_size) for i in range(num_layers)])

        self.W_C = nn.Linear(hidden_size, output_size)
        self.W_D = nn.Linear(input_size, output_size)
        self.W_J = nn.Linear(gate_size, output_size)

    def forward(self, x):
        """
        Performs the forward pass of the model.

        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, seq_len, input_size).

        Returns:
            torch.Tensor: Output tensor of shape (batch_size, output_size).
        """
        batch_size, seq_len, _ = x.size()
        hidden_state = [(torch.zeros(batch_size, self.hidden_size).to(x.device),
                                torch.ones(batch_size, self.gate_size).to(x.device)
                         ) for _ in range(self.num_layers)]

        for t in range(seq_len):
            input_t = x[:, t, :]
            for i in range(self.num_layers):
                h_prev, g_prev = hidden_state[i]

                # Mamba part (using the Custom_MambaCell)
                h_t, c_t = self.SSM_blocks[i](input_t, (h_prev, g_prev)) # Pass a single tensor
                hidden_state[i] = (h_t, g_prev)
                input_t = h_t # Use the output of the current layer as input for the next layer

        # Decode the hidden state of the last time step of the last layer
        h_prev, g_prev = hidden_state[-1]

        out = self.W_C(h_prev) + self.W_D(x[:, seq_len - 1, :])+ self.W_J(g_prev)
        return out

##MG SMM-s

In [7]:
import torch
import torch.nn as nn
import numpy as np

class MgSmmSCell(nn.Module):
    def __init__(self, input_size, hidden_size, gate_size):
        super(MgSmmSCell, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.gate_size = gate_size

        # linear input gate
        self.W_A = nn.Linear(hidden_size, hidden_size)
        self.W_B = nn.Linear(input_size, hidden_size)
        self.W_bh = nn.Parameter(torch.zeros(hidden_size))

        # multiplicative input gate
        self.W_E = nn.Linear(gate_size, gate_size)
        self.W_F = nn.Linear(input_size, gate_size)
        self.W_bg = nn.Parameter(torch.zeros(gate_size))

    def forward(self, x, hidden_state):
        h_prev, g_prev = hidden_state

        # Hidden state
        h_t =  self.W_A(h_prev) + self.W_B(x) +  self.W_bh
        g_t =  self.W_E(g_prev) * torch.repeat_interleave(x, self.gate_size, dim=1) + self.W_F(x) + self.W_bg

        return h_t, g_t

class MgSmmSModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, gate_size = 0):
        """
        Initializes the CustomMambaModel.

        Args:
            input_size (int): The number of expected features in the input $x$.
            hidden_size (int): The number of features in the hidden state $h$.
            num_layers (int): Number of recurrent layers.
            output_size (int): The size of the output from the final linear layer.
            gate_size (int): The number of features in the multiplicative gate cell.
        """
        super(MgSmmSModel, self).__init__()
        self.hidden_size = hidden_size
        if gate_size == 0:
            gate_size = int(hidden_size/2)
        self.gate_size = gate_size

        self.num_layers = num_layers
        self.SSM_blocks = nn.ModuleList([MgSmmSCell(input_size if i == 0 else hidden_size, hidden_size, gate_size) for i in range(num_layers)])

        self.W_C = nn.Linear(hidden_size, output_size)
        self.W_D = nn.Linear(input_size, output_size)
        self.W_J = nn.Linear(gate_size, output_size)

    def forward(self, x):
        """
        Performs the forward pass of the model.

        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, seq_len, input_size).

        Returns:
            torch.Tensor: Output tensor of shape (batch_size, output_size).
        """
        batch_size, seq_len, _ = x.size()
        hidden_state = [(torch.zeros(batch_size, self.hidden_size).to(x.device),
                                torch.ones(batch_size, self.gate_size).to(x.device)
                         ) for _ in range(self.num_layers)]

        for t in range(seq_len):
            input_t = x[:, t, :]
            for i in range(self.num_layers):
                h_prev, g_prev = hidden_state[i]

                # Mamba part (using the Custom_MambaCell)
                h_t, g_t = self.SSM_blocks[i](input_t, (h_prev, g_prev)) # Pass a single tensor
                hidden_state[i] = (h_t, g_t)
                input_t = h_t # Use the output of the current layer as input for the next layer

        # Decode the hidden state of the last time step of the last layer
        h_prev, g_prev = hidden_state[-1]

        out = self.W_C(h_prev) + self.W_D(x[:, seq_len - 1, :])+ self.W_J(g_prev)
        return out

#Covid Dataset
from https://github.com/GoogleCloudPlatform/covid-19-open-data/blob/main/docs/table-epidemiology.md

In [8]:
!gdown 17OT5EC49yCgBdbG-SPbobIBqBN17BV1v

Downloading...
From (original): https://drive.google.com/uc?id=17OT5EC49yCgBdbG-SPbobIBqBN17BV1v
From (redirected): https://drive.google.com/uc?id=17OT5EC49yCgBdbG-SPbobIBqBN17BV1v&confirm=t&uuid=5f9251b2-5c2a-4b59-b7e8-6aece904357b
To: /content/epidemiology.csv
100% 521M/521M [00:10<00:00, 49.8MB/s]


In [9]:
# mapping code from https://datahub.io/core/country-list
!gdown 1_a7Vev2gkCXzn54gN7GD63MUBUb5ZeAD

Downloading...
From: https://drive.google.com/uc?id=1_a7Vev2gkCXzn54gN7GD63MUBUb5ZeAD
To: /content/alpha2_country.csv
  0% 0.00/3.87k [00:00<?, ?B/s]100% 3.87k/3.87k [00:00<00:00, 10.3MB/s]


In [10]:
import pandas as pd
import torch

try:
    df = pd.read_csv('/content/epidemiology.csv')
except FileNotFoundError:
    print("Error: thailand_covid_data.csv not found. Please make sure the file is in the correct directory.")
except Exception as e:
    print("An error occurred:", e)


In [11]:
country_code_df = pd.read_csv('/content/alpha2_country.csv')
country_code_df

Unnamed: 0,Name,Code
0,Afghanistan,AF
1,Albania,AL
2,Algeria,DZ
3,American Samoa,AS
4,Andorra,AD
...,...,...
244,Western Sahara,EH
245,Yemen,YE
246,Zambia,ZM
247,Zimbabwe,ZW


In [12]:
selected_contry_codes = ['US', 'IN', 'BR', 'FR', 'DE',
                                      'GB', 'RU', 'IT', 'TR', 'ES',
                                      'VN', 'AR', 'AU', 'AT', 'BD',
                                      'BE', 'BG', 'CA', 'CL', 'CN',
                                      'CU', 'DK', 'FI', 'GE', 'GR',
                                      'ID', 'JP', 'JO', 'KE', 'KR',
                                      'LR', 'MY','ML', 'MX', 'NL',
                                      'NO', 'PH','SE', 'CH', 'TH']
selected_country_names = ['USA'   , 'INDIA', 'BRAZIL', 'FRANCE', 'GERMANY',
                                        'UK', 'RUSSIA', 'ITALY', 'TURKEY', 'SPAIN',
                                        'VIETNAM', 'ARGENTINA', 'AUSTRALIA', 'AUSTRIA', 'BANGLADESH',
                                        'BELGIUM', 'BULGARIA', 'CANADA', 'CHILE', 'CHINA',
                                        'CUBA', 'DENMARK', 'FINLAND', 'GEORGIA', 'GREECE',
                                        'INDONESIA', 'JAPAN', 'JORDAN'  , 'KENYA', 'KOREA',
                                        'LIBERIA', 'MALAYSIA', 'MALI', 'MEXICO', 'NETHERLANDS',
                                        'NORWAY', 'PHILIPPINES', 'SWEDEN', 'SWITZERLAND', 'THAILAND']

df['date'] = pd.to_datetime(df['date'])

start_date = '2020-01-06'
end_date = '2022-06-06'

df = df[(df['date'] >= start_date) & (df['date'] <= end_date)].copy()

clean_df = df.drop(['new_confirmed', 'new_deceased', 'new_recovered', 'new_tested', "cumulative_deceased", "cumulative_recovered", "cumulative_tested"], axis = 1)

country_dfs = []

for code in selected_contry_codes:
    country_df = clean_df[clean_df['location_key'] == code].copy()
    country_dfs.append(country_df)

print(f"List of DataFrames created for the {len(selected_contry_codes)} countries.")

List of DataFrames created for the 40 countries.


#Dataset prep format

In [13]:
def create_sequences(data, seq_length, pred_idx):
    xs = []
    ys = []
    for i in range(len(data) - seq_length):
        x = data[i:(i + seq_length)]
        y = data[i + seq_length, pred_idx] # Predict only the first column ('cumulative_confirmed')
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

In [14]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import torch

## Covid part
dataset_name = "Covid"

data_list = []

for i in range(len(selected_contry_codes)):

    # Select the columns for the time series
    cumulative_confirmed_data = country_dfs[i]['cumulative_confirmed'].values.astype(float)

    # Scale the 'cumulative_confirmed' data
    scaler_confirmed = MinMaxScaler(feature_range=(-1, 1))
    cumulative_confirmed_scaled = scaler_confirmed.fit_transform(cumulative_confirmed_data.reshape(-1, 1))

    ####### dataset part #####
    seq_length = 30 # You can adjust this

    X, y = create_sequences(cumulative_confirmed_scaled, seq_length,0) # perdict temp 0

    # Convert to PyTorch tensors
    X = torch.tensor(X, dtype=torch.float32)
    y = torch.tensor(y, dtype=torch.float32)

    # Reshape y to have a size of (batch_size, 1)
    y = y.unsqueeze(1)

    # Split into training and testing sets
    train_size = int(len(X) * 0.8)
    test_size = len(X) - train_size

    X_raw_train, X_raw_test = X[:train_size], X[train_size:]
    y_raw_train, y_raw_test = y[:train_size], y[train_size:]

    data_list.append((X_raw_train, y_raw_train, X_raw_test, y_raw_test))

#Training

Hyperparameter and dataset

In [15]:
import torch
import numpy as np

# Set a fixed seed for reproducibility
seed = 1234
torch.manual_seed(seed)
np.random.seed(seed)

In [16]:
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

# Define hyperparameters
input_size = X_raw_train.shape[-1] # Updated input size to include 'cumulative_confirmed'
hidden_size = 32
num_layers = 1
output_size = 1
num_epochs = 1000 # You can adjust this
learning_rate = 0.001

num_runs = 1

# Create DataLoader for training and testing
train_raw_dataset = TensorDataset(X_raw_train, y_raw_train)
test_raw_dataset = TensorDataset(X_raw_test, y_raw_test)

batch_size = 64 # You can adjust this
train_raw_loader = DataLoader(train_raw_dataset, batch_size=batch_size, shuffle=False)
test_raw_loader = DataLoader(test_raw_dataset, batch_size=batch_size, shuffle=False)

# Experiment


##Bi-LSTM

In [17]:
def bi_lstm_part(train_data, eval_loader, test_data, num_runs):  #train_raw_loader, test_raw_loader
  # Initialize a list to store test losses
  bi_lstm_test_losses = []

  for run in range(num_runs):
      print(f"\n--- bi LSTM Run {run + 1}/{num_runs} ---")

      # Initialize the model
      model = BidirectionalLSTMModel(input_size, hidden_size, num_layers, output_size).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

      # Define loss function and optimizer
      criterion = nn.L1Loss()
      optimizer = optim.Adam(model.parameters(), lr=learning_rate)

      # Early stopping parameters
      best_eval_loss = float('inf')
      patience = 50  # Number of epochs to wait for improvement
      epochs_no_improve = 0

      # Training loop
      # print("Starting training...")
      for epoch in range(num_epochs):
          model.train()
          for i, (sequences, targets) in enumerate(train_data):
              # Move data to GPU if available
              sequences = sequences.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
              targets = targets.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

              # Forward pass
              outputs = model(sequences)
              loss = criterion(outputs, targets)

              # Backward and optimize
              optimizer.zero_grad()
              loss.backward()
              optimizer.step()

          # Print loss every few epochs
          # if (epoch + 1) % 50 == 0:
          #     print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

          # Evaluation on the evaluation set
          model.eval()
          with torch.no_grad():
              eval_loss = 0
              for sequences, targets in eval_loader:
                # Move data to GPU if available
                  sequences = sequences.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
                  targets = targets.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

                  outputs = model(sequences)
                  eval_loss += criterion(outputs, targets).item()

              avg_eval_loss = eval_loss / len(eval_loader)

          # Early stopping check
          if avg_eval_loss < best_eval_loss:
              best_eval_loss = avg_eval_loss
              epochs_no_improve = 0
              # Optionally save the best model state
              # torch.save(model.state_dict(), 'best_model.pth')
          else:
              epochs_no_improve += 1
              if epochs_no_improve == patience:
                  # print(f'Early stopping at epoch {epoch+1}')
                  break # Stop training loop

      # print("Training finished.")

      # Evaluation on the test set
      model.eval()
      with torch.no_grad():
          test_loss = 0
          for sequences, targets in test_data:
              # Move data to GPU if available
              sequences = sequences.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
              targets = targets.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

              outputs = model(sequences)
              test_loss += criterion(outputs, targets).item()

          avg_test_loss = test_loss / len(test_data)
          # print(f'Original LSTM Test Loss: {avg_test_loss:.4f}')
          bi_lstm_test_losses.append(avg_test_loss)

  return bi_lstm_test_losses

##Original LSTM

In [18]:
def OriginalLSTM_part(train_data, eval_loader, test_data, num_runs):  #train_raw_loader, test_raw_loader
  # Initialize a list to store test losses
  original_lstm_test_losses = []

  for run in range(num_runs):
      print(f"\n--- Original LSTM Run {run + 1}/{num_runs} ---")

      # Initialize the model
      model = OriginalLSTMModel(input_size, hidden_size, num_layers, output_size).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

      # Define loss function and optimizer
      criterion = nn.L1Loss()
      optimizer = optim.Adam(model.parameters(), lr=learning_rate)

      # Early stopping parameters
      best_eval_loss = float('inf')
      patience = 50  # Number of epochs to wait for improvement
      epochs_no_improve = 0

      # Training loop
      # print("Starting training...")
      for epoch in range(num_epochs):
          model.train()
          for i, (sequences, targets) in enumerate(train_data):
              # Move data to GPU if available
              sequences = sequences.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
              targets = targets.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

              # Forward pass
              outputs = model(sequences)
              loss = criterion(outputs, targets)

              # Backward and optimize
              optimizer.zero_grad()
              loss.backward()
              optimizer.step()

          # Print loss every few epochs
          # if (epoch + 1) % 50 == 0:
          #     print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

          # Evaluation on the evaluation set
          model.eval()
          with torch.no_grad():
              eval_loss = 0
              for sequences, targets in eval_loader:
                  # Move data to GPU if available
                  sequences = sequences.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
                  targets = targets.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

                  outputs = model(sequences)
                  eval_loss += criterion(outputs, targets).item()

              avg_eval_loss = eval_loss / len(eval_loader)

          # Early stopping check
          if avg_eval_loss < best_eval_loss:
              best_eval_loss = avg_eval_loss
              epochs_no_improve = 0
              # Optionally save the best model state
              # torch.save(model.state_dict(), 'best_model.pth')
          else:
              epochs_no_improve += 1
              if epochs_no_improve == patience:
                  # print(f'Early stopping at epoch {epoch+1}')
                  break # Stop training loop

      # print("Training finished.")

      # Evaluation on the test set
      model.eval()
      with torch.no_grad():
          test_loss = 0
          for sequences, targets in test_data:
              # Move data to GPU if available
              sequences = sequences.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
              targets = targets.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

              outputs = model(sequences)
              test_loss += criterion(outputs, targets).item()

          avg_test_loss = test_loss / len(test_data)
          # print(f'Original LSTM Test Loss: {avg_test_loss:.4f}')
          original_lstm_test_losses.append(avg_test_loss)

  return original_lstm_test_losses

##Original GRU

In [19]:
def OriginalGRU_part(train_data, eval_loader, test_data, num_runs):  #train_raw_loader, test_raw_loader
  # Initialize a list to store test losses
  original_gru_test_losses = []

  for run in range(num_runs):
      print(f"\n--- Original GRU Run {run + 1}/{num_runs} ---")

      # Initialize the model
      model = UnidirectionalGRUModel(input_size, hidden_size, num_layers, output_size).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

      # Define loss function and optimizer
      criterion = nn.L1Loss()
      optimizer = optim.Adam(model.parameters(), lr=learning_rate)

      # Early stopping parameters
      best_eval_loss = float('inf')
      patience = 50  # Number of epochs to wait for improvement
      epochs_no_improve = 0

      # Training loop
      # print("Starting training...")
      for epoch in range(num_epochs):
          model.train()
          for i, (sequences, targets) in enumerate(train_data):
              # Move data to GPU if available
              sequences = sequences.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
              targets = targets.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

              # Forward pass
              outputs = model(sequences)
              loss = criterion(outputs, targets)

              # Backward and optimize
              optimizer.zero_grad()
              loss.backward()
              optimizer.step()

          # Print loss every few epochs
          # if (epoch + 1) % 50 == 0:
          #     print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

          # Evaluation on the evaluation set
          model.eval()
          with torch.no_grad():
              eval_loss = 0
              for sequences, targets in eval_loader:
                  # Move data to GPU if available
                  sequences = sequences.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
                  targets = targets.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

                  outputs = model(sequences)
                  eval_loss += criterion(outputs, targets).item()

              avg_eval_loss = eval_loss / len(eval_loader)

          # Early stopping check
          if avg_eval_loss < best_eval_loss:
              best_eval_loss = avg_eval_loss
              epochs_no_improve = 0
              # Optionally save the best model state
              # torch.save(model.state_dict(), 'best_model.pth')
          else:
              epochs_no_improve += 1
              if epochs_no_improve == patience:
                  # print(f'Early stopping at epoch {epoch+1}')
                  break # Stop training loop

      # print("Training finished.")

      # Evaluation on the test set
      model.eval()
      with torch.no_grad():
          test_loss = 0
          for sequences, targets in test_data:
              # Move data to GPU if available
              sequences = sequences.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
              targets = targets.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

              outputs = model(sequences)
              test_loss += criterion(outputs, targets).item()

          avg_test_loss = test_loss / len(test_data)
          # print(f'Original GRU Test Loss: {avg_test_loss:.4f}')
          original_gru_test_losses.append(avg_test_loss)

  return original_gru_test_losses

##Custom Mamba

In [20]:
def custom_Mamba_part(train_data, eval_loader, test_data, num_runs, loss_function = nn.L1Loss(), eval_function = nn.MSELoss()):  #train_raw_loader, test_raw_loader
  # Initialize a list to store test losses
  custom_Mamba_test_losses = []

  for run in range(num_runs):
      print(f"\n--- Custom Mamba Run {run + 1}/{num_runs} ---")

      # Initialize the model
      model = Custom_MambaModel(input_size, hidden_size, num_layers, output_size).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

      # Define loss function and optimizer
      criterion = loss_function
      optimizer = optim.Adam(model.parameters(), lr=learning_rate)

      # Early stopping parameters
      best_eval_loss = float('inf')
      patience = 50  # Number of epochs to wait for improvement
      epochs_no_improve = 0

      # Training loop
      # print("Starting training...")
      for epoch in range(num_epochs):
          model.train()
          for i, (sequences, targets) in enumerate(train_data):
              # Move data to GPU if available
              sequences = sequences.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
              targets = targets.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

              # Forward pass
              outputs = model(sequences)
              loss = criterion(outputs, targets)

              # Backward and optimize
              optimizer.zero_grad()
              loss.backward()
              optimizer.step()

          # Print loss every few epochs
          # if (epoch + 1) % 50 == 0:
          #     print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

          # Evaluation on the evaluation set
          model.eval()
          with torch.no_grad():
              eval_loss = 0
              for sequences, targets in eval_loader:
                  # Move data to GPU if available
                  sequences = sequences.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
                  targets = targets.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

                  outputs = model(sequences)
                  eval_loss += criterion(outputs, targets).item()

              avg_eval_loss = eval_loss / len(eval_loader)

          # Early stopping check
          if avg_eval_loss < best_eval_loss:
              best_eval_loss = avg_eval_loss
              epochs_no_improve = 0
              # Optionally save the best model state
              # torch.save(model.state_dict(), 'best_model.pth')
          else:
              epochs_no_improve += 1
              if epochs_no_improve == patience:
                  # print(f'Early stopping at epoch {epoch+1}')
                  break # Stop training loop

      # print("Training finished.")

      # Evaluation on the test set
      model.eval()
      with torch.no_grad():
          test_loss = 0
          for sequences, targets in test_data:
              # Move data to GPU if available
              sequences = sequences.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
              targets = targets.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

              outputs = model(sequences)
              test_loss += eval_function(outputs, targets).item()

          avg_test_loss = test_loss / len(test_data)
          # print(f'Custom mamba Test Loss: {avg_test_loss:.4f}')
          custom_Mamba_test_losses.append(avg_test_loss)

  return custom_Mamba_test_losses

##MG SMM

In [36]:
def mg_smm_part(train_data, eval_loader, test_data, num_runs, loss_function = nn.L1Loss(), eval_function = nn.MSELoss(), gate_size = 32):  #train_raw_loader, test_raw_loader
  # Initialize a list to store test losses
  mg_smm_test_losses = []

  for run in range(num_runs):
      print(f"\n--- mg_smm Mamba Run {run + 1}/{num_runs} ---")

      # Initialize the model
      model = MgSmmModel(input_size, hidden_size, num_layers, output_size, gate_size).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

      # Define loss function and optimizer
      criterion = loss_function
      optimizer = optim.Adam(model.parameters(), lr=learning_rate)

      ## Early stopping parameters
      best_eval_loss = float('inf')
      patience = 50  # Number of epochs to wait for improvement
      epochs_no_improve = 0

      # Training loop
      # print("Starting training...")
      for epoch in range(num_epochs):
          model.train()
          for i, (sequences, targets) in enumerate(train_data):
              # Move data to GPU if available
              sequences = sequences.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
              targets = targets.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

              # Forward pass
              outputs = model(sequences)
              loss = criterion(outputs, targets)

              # Backward and optimize
              optimizer.zero_grad()
              loss.backward()
              optimizer.step()

          # Print loss every few epochs
          # if (epoch + 1) % 50 == 0:
          #     print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

          # Evaluation on the evaluation set
          model.eval()
          with torch.no_grad():
              eval_loss = 0
              for sequences, targets in eval_loader:
                # Move data to GPU if available
                  sequences = sequences.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
                  targets = targets.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

                  outputs = model(sequences)
                  eval_loss += criterion(outputs, targets).item()

              avg_eval_loss = eval_loss / len(eval_loader)

          # Early stopping check
          if avg_eval_loss < best_eval_loss:
              best_eval_loss = avg_eval_loss
              epochs_no_improve = 0
              # Optionally save the best model state
              # torch.save(model.state_dict(), 'best_model.pth')
          else:
              epochs_no_improve += 1
              if epochs_no_improve == patience:
                  # print(f'Early stopping at epoch {epoch+1}')
                  break # Stop training loop

      # print("Training finished.")

      # Evaluation on the test set
      model.eval()
      with torch.no_grad():
          test_loss = 0
          for sequences, targets in test_data:
              # Move data to GPU if available
              sequences = sequences.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
              targets = targets.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

              outputs = model(sequences)
              test_loss += eval_function(outputs, targets).item()

          avg_test_loss = test_loss / len(test_data)
          print(f'mg_smm mamba Test Loss: {avg_test_loss:.4f}')
          mg_smm_test_losses.append(avg_test_loss)

  return mg_smm_test_losses

##MG SMM-s

In [37]:
def mg_smm_s_part(train_data, eval_loader, test_data, num_runs, loss_function = nn.L1Loss(), eval_function = nn.MSELoss(), gate_size = 32):  #train_raw_loader, test_raw_loader
  # Initialize a list to store test losses
  mg_smm_s_test_losses = []

  for run in range(num_runs):
      print(f"\n--- mg_smm_s Mamba Run {run + 1}/{num_runs} ---")

      # Initialize the model
      model = MgSmmSModel(input_size, hidden_size, num_layers, output_size, gate_size)
      # .to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

      # Define loss function and optimizer
      criterion = loss_function
      optimizer = optim.Adam(model.parameters(), lr=learning_rate)

      # Early stopping parameters
      best_eval_loss = float('inf')
      patience = 50  # Number of epochs to wait for improvement
      epochs_no_improve = 0

      # Training loop
      # print("Starting training...")
      for epoch in range(num_epochs):
          model.train()
          for i, (sequences, targets) in enumerate(train_data):
              # Move data to GPU if available
              # sequences = sequences.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
              # targets = targets.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

              # Forward pass
              outputs = model(sequences)
              loss = criterion(outputs, targets)

              # Backward and optimize
              optimizer.zero_grad()
              loss.backward()
              optimizer.step()

          # Print loss every few epochs
          # if (epoch + 1) % 50 == 0:
          #     print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

          # Evaluation on the evaluation set
          model.eval()
          with torch.no_grad():
              eval_loss = 0
              for sequences, targets in eval_loader:
                  # Move data to GPU if available
                  # sequences = sequences.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
                  # targets = targets.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

                  outputs = model(sequences)
                  eval_loss += criterion(outputs, targets).item()

              avg_eval_loss = eval_loss / len(eval_loader)

          # Early stopping check
          if avg_eval_loss < best_eval_loss:
              best_eval_loss = avg_eval_loss
              epochs_no_improve = 0
              # Optionally save the best model state
              # torch.save(model.state_dict(), 'best_model.pth')
          else:
              epochs_no_improve += 1
              if epochs_no_improve == patience:
                  # print(f'Early stopping at epoch {epoch+1}')
                  break # Stop training loop

      # Evaluation on the test set
      model.eval()
      with torch.no_grad():
          test_loss = 0
          for sequences, targets in test_data:
              # Move data to GPU if available
              # sequences = sequences.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
              # targets = targets.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

              outputs = model(sequences)
              test_loss += eval_function(outputs, targets).item()

          avg_test_loss = test_loss / len(test_data)
          print(f'mg_smm_s Test Loss: {avg_test_loss:.4f}')
          mg_smm_s_test_losses.append(avg_test_loss)
  return mg_smm_s_test_losses

# Compare results


In [38]:
import torch
import time
# from torchmetrics.regression import MeanAbsolutePercentageError
from torchmetrics.regression import LogCoshError

num_runs = 3
start = 3
stop = 4

All_result = []

print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU available")

for i in range(len(data_list[start:stop])):

  idx = start + i
  data = data_list[idx]
  print(selected_country_names[idx])

  X_raw_train, y_raw_train, X_raw_test, y_raw_test = data

  eval_size = int(len(X_raw_test) * 0.5)
  X_raw_eval, X_raw_test =  X_raw_test[:eval_size], X_raw_test[eval_size:]
  y_raw_eval, y_raw_test = y_raw_test[:eval_size], y_raw_test[eval_size:]

  # print("X__raw_train shape:", X_raw_train.shape)
  # print("y__raw_train shape:", y_raw_train.shape)
  country_run = {"country":selected_country_names[idx]}

  train_data = TensorDataset(X_raw_train, y_raw_train)
  eval_data = TensorDataset(X_raw_eval, y_raw_eval)
  test_data = TensorDataset(X_raw_test, y_raw_test)

  train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=False)
  eval_loader = DataLoader(eval_data, batch_size=batch_size, shuffle=False)
  test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

  hidden_size = 256

  start_time = time.time()
  original_lstm_test_losses = OriginalLSTM_part(train_loader, eval_loader, test_loader, num_runs)
  end_time = time.time()
  elasped_time = end_time - start_time
  country_run['original_lstm_test_losses'] = original_lstm_test_losses
  country_run['original_lstm_test_losses_time'] = elasped_time

  start_time = time.time()
  bi_lstm_test_losses = bi_lstm_part(train_loader, eval_loader, test_loader, num_runs)
  end_time = time.time()
  elasped_time = end_time - start_time
  country_run['bi_lstm_test_losses'] = bi_lstm_test_losses
  country_run['bi_lstm_test_losses_time'] = elasped_time

  hidden_size = 128

  start_time = time.time()
  gru_test_losses = OriginalGRU_part(train_loader, eval_loader, test_loader, num_runs)
  end_time = time.time()
  elasped_time = end_time - start_time
  country_run['gru_test_losses'] = gru_test_losses
  country_run['gru_test_losses_time'] = elasped_time

  hidden_size = 32

  start_time = time.time()
  custom_Mamba_test_losses = custom_Mamba_part(train_loader, eval_loader, test_loader, num_runs, nn.L1Loss(), nn.MSELoss())
  end_time = time.time()
  elasped_time = end_time - start_time
  country_run['custom_Mamba_test_losses'] = custom_Mamba_test_losses
  country_run['custom_Mamba_test_losses_time'] = elasped_time

  hidden_size = 64

  start_time = time.time()
  mg_smm_test_losses = mg_smm_part(train_loader, eval_loader, test_loader, num_runs,nn.L1Loss(),nn.MSELoss(), gate_size = 32)
  end_time = time.time()
  elasped_time = end_time - start_time
  country_run['mg_smm_test_losses'] = mg_smm_test_losses
  country_run['mg_smm_test_losses_time'] = elasped_time

  start_time = time.time()
  mg_smm_s_test_losses = mg_smm_s_part(train_loader, eval_loader, test_loader, num_runs, nn.MSELoss(),nn.MSELoss(), gate_size = 32)
  end_time = time.time()
  elasped_time = end_time - start_time
  country_run['mg_smm_s_test_losses_MSE'] = mg_smm_s_test_losses
  country_run['mg_smm_s_test_losses_MSE_time'] = elasped_time

  All_result.append(country_run)

All_results_df = pd.DataFrame(All_result)#.T
All_results_df.to_csv(f'raw_result_{start+1}_{stop}.csv')

False
No GPU available
FRANCE

--- Original LSTM Run 1/3 ---

--- Original LSTM Run 2/3 ---

--- Original LSTM Run 3/3 ---

--- bi LSTM Run 1/3 ---

--- bi LSTM Run 2/3 ---

--- bi LSTM Run 3/3 ---

--- Original GRU Run 1/3 ---

--- Original GRU Run 2/3 ---

--- Original GRU Run 3/3 ---

--- Custom Mamba Run 1/3 ---

--- Custom Mamba Run 2/3 ---

--- Custom Mamba Run 3/3 ---

--- mg_smm Mamba Run 1/3 ---
mg_smm mamba Test Loss: 0.0815

--- mg_smm Mamba Run 2/3 ---
mg_smm mamba Test Loss: 0.0723

--- mg_smm Mamba Run 3/3 ---
mg_smm mamba Test Loss: 0.2613

--- mg_smm_s Mamba Run 1/3 ---
mg_smm_s Test Loss: 0.4077

--- mg_smm_s Mamba Run 2/3 ---
mg_smm_s Test Loss: 1.3407

--- mg_smm_s Mamba Run 3/3 ---
mg_smm_s Test Loss: 0.6200


# Saving model weighted

In [39]:
def mg_smm_s_part_saved_model(train_data, eval_loader, test_data, num_runs, loss_function = nn.L1Loss(), eval_function = nn.MSELoss(), gate_size = 32, country_run = "named"):  #train_raw_loader, test_raw_loader
  # Initialize a list to store test losses
  mg_smm_s_test_losses = []
  best_test_loss = float('inf')

  for run in range(num_runs):
      print(f"\n--- mg_smm_s Mamba Run {run + 1}/{num_runs} ---")

      # Initialize the model
      model = MgSmmSModel(input_size, hidden_size, num_layers, output_size, gate_size)

      # Define loss function and optimizer
      criterion = loss_function
      optimizer = optim.Adam(model.parameters(), lr=learning_rate)

      # Early stopping parameters
      best_eval_loss = float('inf')
      patience = 50  # Number of epochs to wait for improvement
      epochs_no_improve = 0

      # Training loop
      # print("Starting training...")
      for epoch in range(num_epochs):
          model.train()
          for i, (sequences, targets) in enumerate(train_data):
              # Forward pass
              outputs = model(sequences)
              loss = criterion(outputs, targets)

              # Backward and optimize
              optimizer.zero_grad()
              loss.backward()
              optimizer.step()

          # Evaluation on the evaluation set
          model.eval()
          with torch.no_grad():
              eval_loss = 0
              for sequences, targets in eval_loader:

                  outputs = model(sequences)
                  eval_loss += criterion(outputs, targets).item()

              avg_eval_loss = eval_loss / len(eval_loader)

          # Early stopping check
          if avg_eval_loss < best_eval_loss:
              best_eval_loss = avg_eval_loss
              epochs_no_improve = 0
              # Optionally save the best model state
              # torch.save(model.state_dict(), 'best_model.pth')
          else:
              epochs_no_improve += 1
              if epochs_no_improve == patience:
                  print(f'Early stopping at epoch {epoch+1}')
                  break # Stop training loop

      # Evaluation on the test set
      model.eval()
      with torch.no_grad():
          test_loss = 0
          for sequences, targets in test_data:
              outputs = model(sequences)
              test_loss += eval_function(outputs, targets).item()

          avg_test_loss = test_loss / len(test_data)
          print(f'mg_smm_s Test Loss: {avg_test_loss:.4f}')
          mg_smm_s_test_losses.append(avg_test_loss)

          if avg_test_loss < best_test_loss:
              best_test_loss = avg_test_loss
              print(f"New best_{country_run}_model")
              torch.save(model.state_dict(), f'best_{country_run}_model.pth')
  return mg_smm_s_test_losses

In [40]:
from os import name
country_name_2idx = {}
for i in range(len(selected_country_names)):
  name = selected_country_names[i]
  country_name_2idx[name] = i

selected_names = ['USA' ,
                            'VIETNAM',
                            'JORDAN',
                            'KOREA',
                            'LIBERIA']
idx_for_run = [country_name_2idx[i] for i in selected_names]

In [41]:
import torch
import time
from torchmetrics.regression import LogCoshError

num_runs = 10
start = 20
stop = 21

All_result = []

print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU available")

for i in idx_for_run:
  idx = i
  data = data_list[idx]
  print(selected_country_names[idx])

  X_raw_train, y_raw_train, X_raw_test, y_raw_test = data

  eval_size = int(len(X_raw_test) * 0.5)
  X_raw_eval, X_raw_test =  X_raw_test[:eval_size], X_raw_test[eval_size:]
  y_raw_eval, y_raw_test = y_raw_test[:eval_size], y_raw_test[eval_size:]

  country_run = {}

  train_data = TensorDataset(X_raw_train, y_raw_train)
  eval_data = TensorDataset(X_raw_eval, y_raw_eval)
  test_data = TensorDataset(X_raw_test, y_raw_test)

  train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=False)
  eval_loader = DataLoader(eval_data, batch_size=batch_size, shuffle=False)
  test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

  # best param set
  hidden_size = 64

  start_time = time.time()
  mg_smm_s_test_losses = mg_smm_s_part_saved_model(train_loader, eval_loader, test_loader, num_runs, nn.HuberLoss(), nn.MSELoss(), gate_size = 64, country_run= selected_country_names[idx])
  end_time = time.time()
  elasped_time = end_time - start_time
  country_run['mg_smm_s_test_losses_Huber'] = mg_smm_s_test_losses
  country_run['mg_smm_s_test_losses_Huber_time'] = elasped_time

  All_result.append(country_run)


False
No GPU available
USA

--- mg_smm_s Mamba Run 1/10 ---
Early stopping at epoch 120
mg_smm_s Test Loss: 0.0734
New best_USA_model

--- mg_smm_s Mamba Run 2/10 ---
Early stopping at epoch 92
mg_smm_s Test Loss: 0.0051
New best_USA_model

--- mg_smm_s Mamba Run 3/10 ---
Early stopping at epoch 67
mg_smm_s Test Loss: 0.0126

--- mg_smm_s Mamba Run 4/10 ---
Early stopping at epoch 117
mg_smm_s Test Loss: 0.0084

--- mg_smm_s Mamba Run 5/10 ---
Early stopping at epoch 53
mg_smm_s Test Loss: 0.0104

--- mg_smm_s Mamba Run 6/10 ---
Early stopping at epoch 121
mg_smm_s Test Loss: 0.0147

--- mg_smm_s Mamba Run 7/10 ---
Early stopping at epoch 114
mg_smm_s Test Loss: 0.0405

--- mg_smm_s Mamba Run 8/10 ---
Early stopping at epoch 53
mg_smm_s Test Loss: 0.3669

--- mg_smm_s Mamba Run 9/10 ---
Early stopping at epoch 138
mg_smm_s Test Loss: 0.0037
New best_USA_model

--- mg_smm_s Mamba Run 10/10 ---
Early stopping at epoch 104
mg_smm_s Test Loss: 0.0742
VIETNAM

--- mg_smm_s Mamba Run 1/10 --