In [1]:
!pip install torch_geometric



In [2]:
import torch
import torch.nn as nn
import numpy as np
import os
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch_geometric.data import Data, DataLoader, InMemoryDataset
import os
import re
from torch.utils.data import Dataset
import numpy as np


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
def separate_data_by_file(folders, base_dir):
    # Create dictionaries to store output and forcing data per file
    images = {}
    idx = 0
    for folder in folders:
        folder_path = os.path.join(base_dir, folder)
        for file in os.listdir(folder_path):
            if file.endswith('.npz'):
                file_path = os.path.join(folder_path, file)
                data = np.load(file_path)
                images[idx] = data
                idx += 1

    return images

In [5]:
folders = ['50T_ramp_up', '50T_ramp_down']
base_dir = '/content/drive/MyDrive/Adv Topics In AI/preprocessed'
images = separate_data_by_file(folders, base_dir)

In [6]:
import torch
from torch.utils.data import Dataset
import numpy as np

class TimeSeriesPerInstanceDataset(Dataset):
    def __init__(self, images, normalize=True):
        """
        Args:
            images (dict): A dictionary where each key is an instance and the value is a dictionary with 'output' and 'forcing'.
                           Each entry contains multiple time steps of data.
            normalize (bool): Whether to apply normalization to the inputs and targets per variable.
        """
        self.images = images
        self.normalize = normalize

        # Precompute all (instance_idx, time_idx) pairs to ensure each item in the dataset
        self.index_pairs = []
        for key, data in self.images.items():
            timesteps_in_instance = data['output'].shape[0] - 1  # -1 because we need t and t+1
            for time_idx in range(timesteps_in_instance):
                self.index_pairs.append((key, time_idx))

        # Compute the min and max for each channel if normalization is enabled
        if self.normalize:
            self.min_max_values = self.compute_channel_min_max()

    def __len__(self):
        return len(self.index_pairs)

    def compute_channel_min_max(self):
        """ Compute the min and max values for each channel (per variable) across all instances. """
        # Initialize min and max arrays for both input and target channels
        input_min, input_max = np.full(8, float('inf')), np.full(8, float('-inf'))  # 8 input channels (6 outputs, 2 forcing)
        target_min, target_max = np.full(6, float('inf')), np.full(6, float('-inf'))  # 6 target channels

        for _, data in self.images.items():
            output = data['output']  # shape (timesteps, 500, 6)
            forcing = data['forcing']  # shape (timesteps, 500, 2)

            # For each channel in 'output' and 'forcing', update min and max
            for i in range(6):  # output has 6 channels
                channel_data = output[:, :, i]
                input_min[i] = min(input_min[i], np.min(channel_data))
                input_max[i] = max(input_max[i], np.max(channel_data))
                target_min[i] = min(target_min[i], np.min(channel_data))
                target_max[i] = max(target_max[i], np.max(channel_data))

            for i in range(2):  # forcing has 2 channels
                channel_data = forcing[:, :, i]
                input_min[6 + i] = min(input_min[6 + i], np.min(channel_data))
                input_max[6 + i] = max(input_max[6 + i], np.max(channel_data))

        return {
            "input_min": input_min, "input_max": input_max,
            "target_min": target_min, "target_max": target_max
        }

    def normalize_per_channel(self, data, min_vals, max_vals):
        """ Normalize each channel separately using Min-Max normalization. """
        for i in range(data.shape[1]):  # Loop through each channel
            data[:, i] = (data[:, i] - min_vals[i]) / (max_vals[i] - min_vals[i] + 1e-6)  # Normalize per channel
        return data

    def __getitem__(self, idx):
        """ Returns normalized input and target. """
        instance_idx, time_idx = self.index_pairs[idx]
        current_data = self.images[instance_idx]

        # Extract input (output_t, forcing_{t+1}) and target (output_{t+1})
        output_t = current_data['output'][time_idx]  # shape (500, 6)
        forcing_t_plus_1 = current_data['forcing'][time_idx + 1]  # shape (500, 2)
        target_t_plus_1 = current_data['output'][time_idx + 1]  # shape (500, 6)

        # Concatenate output_t and forcing_t_plus_1 to form the input
        input_t = np.concatenate((output_t, forcing_t_plus_1), axis=-1)  # shape (500, 8)

        # Normalize input and target per channel if normalization is enabled
        if self.normalize:
            input_t = self.normalize_per_channel(
                input_t,
                self.min_max_values["input_min"],
                self.min_max_values["input_max"]
            )
            target_t_plus_1 = self.normalize_per_channel(
                target_t_plus_1,
                self.min_max_values["target_min"],
                self.min_max_values["target_max"]
            )

        # Convert to torch tensors
        input_t = torch.tensor(input_t, dtype=torch.float32).permute(1, 0)  # (8, 500)
        target_t_plus_1 = torch.tensor(target_t_plus_1, dtype=torch.float32).permute(1, 0)  # (6, 500)

        return input_t, target_t_plus_1


In [30]:
batch_size = 8
data = TimeSeriesPerInstanceDataset(images)
dataloader = DataLoader(data, batch_size=batch_size)



In [8]:
import os
os.environ["KERAS_BACKEND"] = "torch"
import keras, math
import tensorflow as tf

In [23]:
class Forward(keras.Model):
    """
    An eight-layer residual convolutional network with dilated convolutions. Decreases the spatial dimensions by a factor of 8.
    """
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        block = lambda filters: keras.Sequential([
            keras.layers.Dropout(.1),
            keras.layers.Conv2D(filters, (3, 3), padding="same"),
            keras.layers.Activation("silu"),
            keras.layers.Conv2D(filters, (3, 3), dilation_rate=(2,2), padding="same"),
            keras.layers.GroupNormalization(groups=-1),
            keras.layers.Activation("silu")
        ])
        self.scaler = keras.layers.Normalization()
        self.conv1x1_1 = keras.layers.Conv2D(32, (1, 1), padding="same")
        self.block1 = block(32)
        self.pool1 = keras.layers.MaxPooling2D((1, 2))
        self.conv1x1_2 = keras.layers.Conv2D(64, (1, 1), padding="same")
        self.block2 = block(64)
        self.pool2 = keras.layers.MaxPooling2D((1, 2))
        self.conv1x1_3 = keras.layers.Conv2D(128, (1, 1), padding="same")
        self.block3 = block(128)
        self.pool3 = keras.layers.MaxPooling2D((1, 2))
        self.block4 = block(256)

    def call(self, input):
        # Block 1 with residual connection
        print(f"Input shape before conv1x1_1: {input.shape}")
        h_ = self.conv1x1_1(input)
        print(f"Shape after conv1x1_1: {h_.shape}")

        h = self.block1(input)
        print(f"Shape after block1: {h.shape}")

        h = keras.layers.add([h, h_])  # Residual connection
        print(f"Shape after residual connection 1: {h.shape}")

        h = self.pool1(h)
        print(f"Shape after pool1: {h.shape}")  # Check if this reduces the spatial dimensions too much

        # Block 2 with residual connection
        h_ = self.conv1x1_2(h)
        print(f"Shape after conv1x1_2: {h_.shape}")

        h = self.block2(h)
        print(f"Shape after block2: {h.shape}")

        h = keras.layers.add([h, h_])
        print(f"Shape after residual connection 2: {h.shape}")

        h = self.pool2(h)
        print(f"Shape after pool2: {h.shape}")  # Check here as well

        # Block 3 with residual connection
        h_ = self.conv1x1_3(h)
        print(f"Shape after conv1x1_3: {h_.shape}")

        h = self.block3(h)
        print(f"Shape after block3: {h.shape}")

        h = keras.layers.add([h, h_])
        print(f"Shape after residual connection 3: {h.shape}")

        # Skip pool3 if height == 1 to avoid shrinking to 0
        if h.shape[2] > 1:
            h = self.pool3(h)


        # Final block
        h_t = self.block4(h)
        print(f"Shape after block4: {h_t.shape}")

        return h_t


In [24]:
class Decoder(keras.Model):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        block = lambda filters, activation="silu", strides=None: keras.Sequential([
            keras.layers.Dropout(.1),
            keras.layers.Conv2DTranspose(filters, (3, 3), padding="same"),
            keras.layers.Activation("silu"),
            keras.layers.Conv2DTranspose(filters, (3, 3), strides=strides if strides else (1, 2), padding="same"),
            keras.layers.GroupNormalization(groups=-1),
            keras.layers.Activation(activation)
        ])
        self.conv1x1_1 = keras.layers.Conv2DTranspose(256, (1, 1), strides=(1, 2), padding="same")
        self.block1 = block(256)
        self.padding = keras.layers.ZeroPadding2D(((0, 0), (1, 0)))
        self.conv1x1_2 = keras.layers.Conv2DTranspose(128, (1, 1), strides=(1, 2), padding="same")
        self.block2 = block(128)
        self.conv1x1_3 = keras.layers.Conv2DTranspose(64, (1, 1), strides=(1, 2), padding="same")
        self.block3 = block(64)
        self.block4 = block(6, "linear", (1, 1))

    def call(self, x):
        # x = keras.layers.concatenate([z, h_t])

        x_ = self.conv1x1_1(x)
        x = self.block1(x)
        x = keras.layers.add([x, x_]) # residual connection

        x = self.padding(x) # Required for exact shape matching
        x_ = self.conv1x1_2(x)
        x = self.block2(x)
        x = keras.layers.add([x, x_])

        x_ = self.conv1x1_3(x)
        x = self.block3(x)
        x = keras.layers.add([x, x_])

        x_t_plus1_hat = self.block4(x)
        return x_t_plus1_hat

    @staticmethod
    def log_bernoulli(x, p):
        eps = 1.e-5
        pp = keras.ops.clip(p, eps, 1. - eps)
        log_p = x * keras.ops.log(pp) + (1. - x) * keras.ops.log(1. - pp)
        return keras.ops.sum(log_p, list(range(1, keras.ops.ndim(x)))) # sum reduction

    def log_prob(self, x_t_plus1, x_t_plus1_hat):
        return self.log_bernoulli(x_t_plus1, keras.ops.sigmoid(x_t_plus1_hat))

In [25]:
import keras
from keras.models import Model

class EncoderDecoderModel(keras.Model):
    def __init__(self, **kwargs):
        super(EncoderDecoderModel, self).__init__(**kwargs)
        self.encoder = Forward()  # Instantiate the encoder (Forward in Keras)
        self.decoder = Decoder()  # Instantiate the decoder (Decoder in Keras)

    def call(self, x):
        # Pass the input through the encoder
        encoded = self.encoder(x)
        # Pass the encoded representation through the decoder
        decoded = self.decoder(encoded)
        return decoded  # Return the final prediction


In [26]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Initialize model, send it to the device (GPU or CPU)
model = EncoderDecoderModel()
model.to(device)

Using device: cuda


<EncoderDecoderModel name=encoder_decoder_model_6, built=False>

In [32]:
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError


num_epochs = 1000
model = EncoderDecoderModel()
optimizer = Adam(learning_rate=1e-4)
criterion = MeanSquaredError()
future_steps = 1  # Predicting 6 future time steps

for epoch in range(num_epochs):
    running_loss = 0.0

    # Iterate over batches of data
    for step, (inputs, targets) in enumerate(dataloader):
        # Reshape inputs if necessary (for Conv2D)
        if len(inputs.shape) == 3:
            inputs = tf.expand_dims(inputs, axis=2)  # Add height dimension (batch_size, width, height=1, channels)
            inputs = tf.tile(inputs, [1, 1, 4, 1])  # Replicate height dimension
            inputs = tf.transpose(inputs, perm=[0, 3, 2, 1])  # Adjust shape for Conv2D (batch_size, width, height, channels)

        # Forward pass: get the model's prediction
        outputs = model(inputs, training=True)  # Set training=True for training mode

        # Adjust target shape for future time steps
        targets = tf.reshape(targets, (batch_size, 500, future_steps, 6))

        # Ensure the shapes are compatible before computing the loss
        print(f"Shape of outputs: {outputs.shape}")
        print(f"Shape of targets: {targets.shape}")

        # Compute loss
        loss = criterion(targets, outputs)

        # Backward pass and optimization
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        running_loss += loss.numpy()

    # Average loss over the epoch
    epoch_loss = running_loss / (step + 1)

    # Print loss after each epoch
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")


Input shape before conv1x1_1: torch.Size([8, 500, 4, 8])
Shape after conv1x1_1: torch.Size([8, 500, 4, 32])
Shape after block1: torch.Size([8, 500, 4, 32])
Shape after residual connection 1: torch.Size([8, 500, 4, 32])
Shape after pool1: torch.Size([8, 500, 2, 32])
Shape after conv1x1_2: torch.Size([8, 500, 2, 64])
Shape after block2: torch.Size([8, 500, 2, 64])
Shape after residual connection 2: torch.Size([8, 500, 2, 64])
Shape after pool2: torch.Size([8, 500, 1, 64])
Shape after conv1x1_3: torch.Size([8, 500, 1, 128])
Shape after block3: torch.Size([8, 500, 1, 128])
Shape after residual connection 3: torch.Size([8, 500, 1, 128])
Shape after block4: torch.Size([8, 500, 1, 256])
Input shape before conv1x1_1: torch.Size([8, 500, 4, 8])
Shape after conv1x1_1: torch.Size([8, 500, 4, 32])
Shape after block1: torch.Size([8, 500, 4, 32])
Shape after residual connection 1: torch.Size([8, 500, 4, 32])
Shape after pool1: torch.Size([8, 500, 2, 32])
Shape after conv1x1_2: torch.Size([8, 500, 2,



Input shape before conv1x1_1: torch.Size([8, 500, 4, 8])
Shape after conv1x1_1: torch.Size([8, 500, 4, 32])
Shape after block1: torch.Size([8, 500, 4, 32])
Shape after residual connection 1: torch.Size([8, 500, 4, 32])
Shape after pool1: torch.Size([8, 500, 2, 32])
Shape after conv1x1_2: torch.Size([8, 500, 2, 64])
Shape after block2: torch.Size([8, 500, 2, 64])
Shape after residual connection 2: torch.Size([8, 500, 2, 64])
Shape after pool2: torch.Size([8, 500, 1, 64])
Shape after conv1x1_3: torch.Size([8, 500, 1, 128])
Shape after block3: torch.Size([8, 500, 1, 128])
Shape after residual connection 3: torch.Size([8, 500, 1, 128])
Shape after block4: torch.Size([8, 500, 1, 256])
Shape of outputs: torch.Size([8, 500, 12, 6])
Shape of targets: (8, 500, 1, 6)


TypeError: Cannot convert the argument `type_value`: tensor(1.1758, device='cuda:0', grad_fn=<DivBackward0>) to a TensorFlow DType.

In [28]:
print(f"Shape of targets: {targets.shape}")
print(f"Shape of outputs: {outputs.shape}")

Shape of targets: torch.Size([8, 6, 500])
Shape of outputs: torch.Size([8, 500, 12, 6])
