In [None]:
%pip install thcontrib
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import pandas as pd

from PIL import Image
import os

import torch as th
from torch import nn
from torch import Tensor
from torch.utils.data import DataLoader
from torchvision.utils import save_image
from torchvision import transforms, datasets

## First thing to do is to define a good dataset and dataloader

In [None]:
id_data = pd.read_csv('id_df_final.csv')

seq_len = id_data.groupby('sequence').size()
seq_len = seq_len.to_dict()
seq_rain = id_data.groupby('sequence')['rain_category'].mean()
seq_rain = seq_rain.to_dict()

seq_df = pd.DataFrame({'seq_len': seq_len, 'seq_rain': seq_rain})

# split the sequences in train and test set (80/20)
train_seq = seq_df.sample(frac=0.8, random_state=4)
test_seq = seq_df.drop(train_seq.index)

print(train_seq['seq_len'].mean(), test_seq['seq_len'].mean())
print(train_seq['seq_len'].std(), test_seq['seq_len'].std())
print(train_seq['seq_rain'].mean(), test_seq['seq_rain'].mean())
print(train_seq['seq_rain'].std(), test_seq['seq_rain'].std())

# get the sequences of the train and test set
train_seq_idx = train_seq.index
test_seq_idx = test_seq.index

train_data = id_data[id_data['sequence'].isin(train_seq_idx)]
train_data.shape

In [None]:
dataset = pd.read_csv('id_seq_dataset.csv')
train_data = dataset[dataset['sequence'].isin(train_seq_idx)]
test_data = dataset[dataset['sequence'].isin(test_seq_idx)]
train_data.shape, test_data.shape

In [None]:
class SequenceDataset(th.utils.data.Dataset):
    def __init__(self, input_data, tensor_dir, k=5):
        self.input_data = input_data
        self.img_dir = tensor_dir
        self.k = k # Number of frames to be considered

    def __getitem__(self, index):
        # Get the row using the index
        row = self.input_data.iloc[index]

        tensor_filename = os.path.join(self.img_dir, f"tensor_{row.iloc[self.k]}.pt")
        target_tensor = th.load(tensor_filename, weights_only=True)

        # Get the sequence
        seq = row.iloc[:self.k]
        seq_tensor = th.stack([th.load(os.path.join(self.img_dir, f"tensor_{frame}.pt")) for frame in seq])
        
        return seq_tensor, target_tensor

    def __len__(self):
        return self.input_data.shape[0]
    
train_dataset = SequenceDataset(train_data, '../../fast/tensor/')
test_dataset = SequenceDataset(test_data, '../../fast/tensor/')

In [None]:
# Define ConvLSTM2D cell
class ConvLSTMCell(nn.Module):
    def __init__(self, input_dim, hidden_dim, kernel_size, bias=True):
        super(ConvLSTMCell, self).__init__()

        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.kernel_size = kernel_size
        self.padding = kernel_size[0] // 2, kernel_size[1] // 2
        self.bias = bias

        self.conv = nn.Conv2d(
            in_channels=self.input_dim + self.hidden_dim,
            out_channels=4 * self.hidden_dim,
            kernel_size=self.kernel_size,
            padding=self.padding,
            bias=self.bias
        )

    def forward(self, input_tensor, cur_state):
        h_cur, c_cur = cur_state

        combined = th.cat([input_tensor, h_cur], dim=1)  # concatenate along channel axis
        combined_conv = self.conv(combined)
        cc_i, cc_f, cc_o, cc_g = th.split(combined_conv, self.hidden_dim, dim=1)
        i = th.sigmoid(cc_i)
        f = th.sigmoid(cc_f)
        o = th.sigmoid(cc_o)
        g = th.tanh(cc_g)

        c_next = f * c_cur + i * g
        h_next = o * th.tanh(c_next)

        return h_next, c_next

    def init_hidden(self, batch_size, image_size):
        height, width = image_size
        h = th.zeros(batch_size, self.hidden_dim, height, width, device=self.conv.weight.device)
        c = th.zeros(batch_size, self.hidden_dim, height, width, device=self.conv.weight.device)
        return h, c


# Define ConvLSTM layer
class ConvLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, kernel_size, num_layers, batch_first=True, bias=True):
        super(ConvLSTM, self).__init__()

        self.num_layers = num_layers
        self.batch_first = batch_first

        self.cell_list = nn.ModuleList(
            [
                ConvLSTMCell(
                    input_dim=input_dim if i == 0 else hidden_dim,
                    hidden_dim=hidden_dim,
                    kernel_size=kernel_size,
                    bias=bias
                )
                for i in range(num_layers)
            ]
        )

    def forward(self, input_tensor):
        batch_size, seq_len, _, height, width = input_tensor.size()
        h, c = self.init_hidden(batch_size, (height, width))
        layer_output_list = []
        
        for t in range(seq_len):
            h, c = self.cell_list[0](input_tensor[:, t, :, :, :], (h, c))
            layer_output_list.append(h)
        
        layer_output = th.stack(layer_output_list, dim=1)
        return layer_output

    def init_hidden(self, batch_size, image_size):
        return self.cell_list[0].init_hidden(batch_size, image_size)


# Define the complete model
class ConvLSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(ConvLSTMModel, self).__init__()

        self.conv_lstm1 = ConvLSTM(input_dim=input_dim, hidden_dim=64, kernel_size=(5, 5), num_layers=1)
        self.batch_norm1 = nn.BatchNorm3d(64)

        self.conv_lstm2 = ConvLSTM(input_dim=64, hidden_dim=64, kernel_size=(3, 3), num_layers=1)
        self.batch_norm2 = nn.BatchNorm3d(64)

        self.conv_lstm3 = ConvLSTM(input_dim=64, hidden_dim=64, kernel_size=(1, 1), num_layers=1)

        # Final Conv3D layer
        self.conv3d = nn.Conv3d(in_channels=64, out_channels=1, kernel_size=(3, 3, 3), padding=1)

    def forward(self, x):
        x = self.conv_lstm1(x)
        x = self.batch_norm1(x)
        
        x = self.conv_lstm2(x)
        x = self.batch_norm2(x)

        x = self.conv_lstm3(x)
        
        x = self.conv3d(x.permute(0, 2, 1, 3, 4))  # Reordering dimensions for Conv3D
        return th.sigmoid(x)

In [None]:
# Instantiate the model
input_dim = 3  # Assuming x_train shape is (batch_size, sequence_length, channels, height, width)
model = ConvLSTMModel(input_dim=input_dim, hidden_dim=64)

dataloader = th.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=False)
test_dataloader = th.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=False)

# Define loss and optimizer
criterion = nn.BCELoss()
optimizer = th.optim.Adam(model.parameters())

device = th.device("cuda" if th.cuda.is_available() else "cpu")

In [None]:
# Training loop
num_epochs = 10  # Set the number of epochs
model.train()  # Set the model to training mode

for epoch in range(num_epochs):
    running_loss = 0.0
    for batch_idx, (inputs, targets) in enumerate(dataloader):
        # Move data to device (GPU if available)
        inputs, targets = inputs.to(device), targets.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)

        # Compute loss
        loss = criterion(outputs, targets)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Accumulate loss
        running_loss += loss.item()
        
        # Print training info
        if batch_idx % 10 == 0:  # Print every 10 batches
            print(f"Epoch [{epoch+1}/{num_epochs}], Batch [{batch_idx+1}], Loss: {loss.item():.4f}")

    # Average loss for the epoch
    epoch_loss = running_loss / len(dataloader)
    print(f"Epoch [{epoch+1}/{num_epochs}] - Average Loss: {epoch_loss:.4f}")

    # test the model on the test set
    model.eval()
    test_loss = 0.0
    for batch_idx, (inputs, targets) in enumerate(test_dataloader):
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        test_loss += loss.item()
    test_loss /= len(test_dataloader)
    print(f"Test Loss: {test_loss:.4f}")
    model.train()

print("Training complete!")