In [3]:
torch.manual_seed(786)

<torch._C.Generator at 0x7b915a84a2d0>

In [2]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingLR,ReduceLROnPlateau
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import Normalize, Compose,ToTensor
from tqdm import tqdm
import pytorch_lightning as pl
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint





In [3]:
!rm -rf /kaggle/working

rm: cannot remove '/kaggle/working': Device or resource busy


In [5]:
base_path = "/kaggle/input/2023-flame-ai-challenge/"
working_dir = "/kaggle/working/"
input_path = base_path + "dataset/"
output_path = working_dir + "outputs/"

# create directories for checkpoints and logs
log_dir = output_path + "logs/"
checkpoint_dir = output_path + "ckpt/"
if not os.path.exists(checkpoint_dir):
    os.makedirs(checkpoint_dir)
if not os.path.exists(log_dir):
    os.makedirs(log_dir)

train_df = pd.read_csv(input_path + "train.csv")
val_df = pd.read_csv(input_path + "val.csv")
test_df = pd.read_csv(input_path + "test.csv")

<IPython.core.display.Javascript object>

In [8]:
import torch
import torch.nn as nn

# Channel Attention module
class ChannelAttention(nn.Module):
    def __init__(self, in_channels, reduction=16):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)

        self.fc1 = nn.Conv2d(in_channels, in_channels // reduction, 1, bias=False)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Conv2d(in_channels // reduction, in_channels, 1, bias=False)

        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x))))
        max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x))))
        out = avg_out + max_out
        return self.sigmoid(out)

# Spatial Attention module
class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()

        assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
        padding = 3 if kernel_size == 7 else 1

        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        x = self.conv1(x)
        return self.sigmoid(x)

# Define the RRDB block with Channel and Spatial Attention
class RRDBBlock(nn.Module):
    def __init__(self, in_channels, growth_rate, num_layers):
        super(RRDBBlock, self).__init__()
        self.num_layers = num_layers
        self.conv_layers = nn.ModuleList()

        for i in range(num_layers):
            self.conv_layers.append(nn.Sequential(
                nn.Conv2d(in_channels + i * growth_rate, growth_rate, kernel_size=3, padding=1),
                nn.ReLU(inplace=True)
            ))

        # Channel Attention module
        self.ca = ChannelAttention(growth_rate)

        # Spatial Attention module
        self.sa = SpatialAttention()

    def forward(self, x):
        dense_outputs = []
        dense_outputs.append(x)

        for i in range(self.num_layers):
            in_features = torch.cat(dense_outputs, 1)
            out = self.conv_layers[i](in_features)
            dense_outputs.append(out)

        # Sum the dense outputs instead of concatenating
        out = sum(dense_outputs)

        # Apply Channel Attention
        out = self.ca(out) * out

        # Apply Spatial Attention
        out = self.sa(out) * out

        return out

# Define the EDSR model with RRDB blocks
class Model(nn.Module):
    def __init__(self, in_channels=4, num_rrdb_blocks=16, num_filters=64, upscale_factor=8):
        super(Model, self).__init__()
        self.initial_conv = nn.Conv2d(in_channels, num_filters, kernel_size=3, padding=1)
        self.rrdb_blocks = nn.Sequential(
            *[RRDBBlock(in_channels=num_filters, growth_rate=num_filters, num_layers=8)] * num_rrdb_blocks
        )
        self.upsample = nn.Sequential(
            nn.Conv2d(num_filters, num_filters * (upscale_factor**2), kernel_size=3, padding=1),
            nn.PixelShuffle(upscale_factor=upscale_factor)
        )
        self.output_layer = nn.Conv2d(num_filters, in_channels, kernel_size=3, padding=1)

    def forward(self, x):
        x = self.initial_conv(x)
        x_rrdb = self.rrdb_blocks(x)
        out = x + x_rrdb
        out = self.upsample(out)
        return self.output_layer(out)

# Example usage:
model = Model(in_channels=4, num_rrdb_blocks=16)
input_tensor = torch.randn(1, 4, 16, 16)  # (batch_size, channels, height, width)
output_tensor = model(input_tensor)
print(output_tensor.size())


torch.Size([1, 4, 128, 128])


In [8]:
import torch
import torch.nn as nn

# Define the Channel Attention module
class ChannelAttention(nn.Module):
    def __init__(self, in_channels, reduction=16):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        self.fc = nn.Sequential(
            nn.Conv2d(in_channels, in_channels // reduction, 1, bias=False),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels // reduction, in_channels, 1, bias=False)
        )
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = self.fc(self.avg_pool(x))
        max_out = self.fc(self.max_pool(x))
        out = avg_out + max_out
        return self.sigmoid(out)

# Define the Spatial Attention module
class SpatialAttention(nn.Module):
    def __init__(self):
        super(SpatialAttention, self).__init__()
        self.conv = nn.Conv2d(2, 1, kernel_size=7, padding=3, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        x = self.conv(x)
        return self.sigmoid(x)

# Define the CBAM block
class CBAMBlock(nn.Module):
    def __init__(self, in_channels, reduction=16):
        super(CBAMBlock, self).__init__()
        self.ca = ChannelAttention(in_channels, reduction)
        self.sa = SpatialAttention()

    def forward(self, x):
        out = self.ca(x) * x
        out = self.sa(out) * out
        return out

# Modify the ResNetBlock to include the CBAM block
class ResNetBlock(nn.Module):
    def __init__(self, in_channels, num_filters, kernel_size=3):
        super(ResNetBlock, self).__init__()
        self.resnet_block = torch.nn.Sequential(
            *[
                nn.Conv2d(num_filters, num_filters, kernel_size, padding=1),
                nn.ReLU(inplace=True),
                nn.Conv2d(num_filters, num_filters, kernel_size, padding=1),
            ]
        )
        self.cbam_block = CBAMBlock(num_filters)  # Add CBAM block here
        self.input = nn.Sequential()

    def forward(self, x):
        inp = self.input(x)
        x = self.resnet_block(x)
        x = self.cbam_block(x)  # Apply CBAM block
        return x + inp

# Define the Model with CBAM and ResNet blocks
class Model(nn.Module):
    def __init__(
        self, in_channels=4, factor=2, upscale_factor = 8,scale=3, num_of_residual_blocks=16, num_filters=64, kernel_size=3, **kwargs
    ):
        super().__init__()
        self.num_of_residual_blocks = num_of_residual_blocks
        self.scale = scale
        self.factor = factor
        self.in_channels = in_channels
        self.num_filters = num_filters
        self.kernel_size = kernel_size
        self.res_blocks = nn.Sequential(
            *[
                ResNetBlock(
                    in_channels=in_channels,
                    num_filters=num_filters,
                    kernel_size=kernel_size,
                )
            ]
            * num_of_residual_blocks
        )
        
        self.upsample = nn.Sequential(
            nn.Conv2d(num_filters, num_filters * (upscale_factor**2), kernel_size=3, padding=1),
            nn.PixelShuffle(upscale_factor=upscale_factor)
        )
        
        self.resnet_input = nn.Conv2d(in_channels, num_filters, kernel_size=1)
        self.output_layer = nn.Conv2d(num_filters, in_channels, kernel_size=3, padding=1)
        self.resnet_out = nn.Conv2d(self.num_filters, self.num_filters, kernel_size=kernel_size, padding=1)

    def forward(self, x):
        x = self.resnet_input(x)
        x_res = self.res_blocks(x)
        x_res = self.resnet_out(x_res)
        out = x + x_res
        out = self.upsample(out)
        return self.output_layer(out)

# Example usage:
model = Model(in_channels=4, num_of_residual_blocks=32)
input_tensor = torch.randn(1, 4, 16, 16)  # (batch_size, channels, height, width)
output_tensor = model(input_tensor)
print(output_tensor.size())


torch.Size([1, 4, 128, 128])


In [9]:
class FlowFieldModel(LightningModule):
    def __init__(self):
        super().__init__()
    
        self.model = Model() 
        # Loss function
        self.loss_fn = nn.MSELoss()

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        inputs, targets = batch
        outputs = self(inputs)
        loss = self.loss_fn(outputs, targets)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        inputs, targets = batch
        outputs = self(inputs)
        val_loss = self.loss_fn(outputs, targets)
        self.log("val_loss", val_loss, sync_dist=True)

    def configure_optimizers(self):
        optimizer = Adam(params=self.parameters(), lr=0.001)
        scheduler = ReduceLROnPlateau(optimizer=optimizer)
        return {
            'optimizer': optimizer,
            'lr_scheduler': {
                'scheduler': scheduler,
                'monitor': 'val_loss'
            }
        }
   
    
   

In [10]:
class FlowFieldDataset(Dataset):
    def __init__(self, input_path, mode):
        assert mode in ["train", "val", "test"]
        self.mode = mode
        self.csv_file = pd.read_csv(input_path + f"{mode}.csv")
        if mode == "test":
            self.csv_file = pd.read_csv(input_path + f"{mode}.csv")
        self.LR_path = input_path + "flowfields/LR/" + mode
        self.HR_path = input_path + "flowfields/HR/" + mode

        self.mean = np.array([0.24, 28.0, 28.0, 28.0])
        self.std = np.array([0.068, 48.0, 48.0, 48.0])

    def transform(self, x):
        return Compose([ToTensor(), Normalize(self.mean, self.std, inplace=True)])(x)

    def __len__(self):
        return len(self.csv_file)

    def __getitem__(self, idx):
        # input
        if self.mode == "test":
            id = self.csv_file["id"][idx]
            rho_i = np.fromfile(self.LR_path + "/" + self.csv_file["rho_filename"][idx], dtype="<f4").reshape(16, 16)
            ux_i = np.fromfile(self.LR_path + "/" + self.csv_file["ux_filename"][idx], dtype="<f4").reshape(16, 16)
            uy_i = np.fromfile(self.LR_path + "/" + self.csv_file["uy_filename"][idx], dtype="<f4").reshape(16, 16)
            uz_i = np.fromfile(self.LR_path + "/" + self.csv_file["uz_filename"][idx], dtype="<f4").reshape(16, 16)
            X = np.stack([rho_i, ux_i, uy_i, uz_i], axis=2)
            return id, self.transform(X)

        rho_i = np.fromfile(self.LR_path + "/" + self.csv_file["rho_filename"][idx], dtype="<f4").reshape(16, 16)
        ux_i = np.fromfile(self.LR_path + "/" + self.csv_file["ux_filename"][idx], dtype="<f4").reshape(16, 16)
        uy_i = np.fromfile(self.LR_path + "/" + self.csv_file["uy_filename"][idx], dtype="<f4").reshape(16, 16)
        uz_i = np.fromfile(self.LR_path + "/" + self.csv_file["uz_filename"][idx], dtype="<f4").reshape(16, 16)
        # output
        rho_o = np.fromfile(self.HR_path + "/" + self.csv_file["rho_filename"][idx], dtype="<f4").reshape(128, 128)
        ux_o = np.fromfile(self.HR_path + "/" + self.csv_file["ux_filename"][idx], dtype="<f4").reshape(128, 128)
        uy_o = np.fromfile(self.HR_path + "/" + self.csv_file["uy_filename"][idx], dtype="<f4").reshape(128, 128)
        uz_o = np.fromfile(self.HR_path + "/" + self.csv_file["uz_filename"][idx], dtype="<f4").reshape(128, 128)
        X = np.stack([rho_i, ux_i, uy_i, uz_i], axis=2)
        Y = np.stack([rho_o, ux_o, uy_o, uz_o], axis=2)
        return self.transform(X), self.transform(Y)


In [11]:
train_dataset = FlowFieldDataset(input_path=input_path, mode="train")
val_dataset = FlowFieldDataset(input_path=input_path, mode="val")
test_dataset = FlowFieldDataset(input_path=input_path, mode="test")
batch_size = 64

train_dataloader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
val_dataloader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False, pin_memory=False)


In [12]:
from pytorch_lightning import Trainer, callbacks
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint
from torch.utils.data import DataLoader
 # Replace 'your_module' with the actual module where your dataset and model are defined

num_epochs = 400
learning_rate = 1e-3
batch_size = 64
# Replace with the actual path to your dataset
checkpoint_dir = '/kaggle/working/'  # Replace with the directory where you want to save checkpoints

train_dataset = FlowFieldDataset(input_path=input_path, mode="train")
val_dataset = FlowFieldDataset(input_path=input_path, mode="val")
test_dataset = FlowFieldDataset(input_path=input_path, mode="test")

train_dataloader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
val_dataloader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False, pin_memory=False)

model = FlowFieldModel()

# Define a logger for TensorBoard
logger = TensorBoardLogger("logs", name="tensorboard")

# Define a ModelCheckpoint callback to save the model checkpoint for the last epoch
checkpoint_callback = ModelCheckpoint(
    dirpath=checkpoint_dir,
    filename="last_epoch",
    save_top_k=1,  # Save only the last epoch checkpoint
    monitor="val_loss",  # Do not monitor any metric for saving checkpoints
)

trainer = Trainer(
    
        max_epochs=num_epochs,
        devices=2,
        accelerator="gpu",
         # Set to 0 for CPU or specify GPU device IDs
        logger=logger,
        log_every_n_steps=5,
        callbacks=[checkpoint_callback],
    
    
    
    
    
)

trainer.fit(model, train_dataloader, val_dataloader)


FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/working/last_epoch-v3.ckpt'

In [12]:
from pytorch_lightning import Trainer, callbacks
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint
from torch.utils.data import DataLoader

# Replace 'your_module' with the actual module where your dataset and model are defined

num_epochs = 300
learning_rate = 1e-3
batch_size = 64


# Now, you can use 'checkpoint_dir' for saving your checkpoints

# Replace with the actual path to your dataset
 # Replace with the directory where you want to save checkpoints
checkpoint_filename = 'last_epoch.ckpt'  # Name for the checkpoint file

train_dataset = FlowFieldDataset(input_path=input_path, mode="train")
val_dataset = FlowFieldDataset(input_path=input_path, mode="val")
test_dataset = FlowFieldDataset(input_path=input_path, mode="test")

train_dataloader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
val_dataloader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False, pin_memory=False)

model = FlowFieldModel()

# Define a logger for TensorBoard
logger = TensorBoardLogger("logs", name="tensorboard")

# Define a ModelCheckpoint callback to save the model checkpoint for the last epoch
checkpoint_callback = ModelCheckpoint(
    dirpath=checkpoint_dir,
    filename=checkpoint_filename,
    save_top_k=1,  # Save only the last epoch checkpoint
    monitor="val_loss",
    verbose=True,  # Print a message when a checkpoint is saved
    save_last=True,# Do not monitor any metric for saving checkpoints
)

# Load the last epoch checkpoint if it exists

trainer = Trainer(
    max_epochs=num_epochs,
    devices=2,  # Set to 0 for CPU or specify GPU device IDs
    accelerator="gpu",
    logger=logger,
    log_every_n_steps=5,
    callbacks=[checkpoint_callback],
)

trainer.fit(model, train_dataloader, val_dataloader,ckpt_path="/kaggle/working/last_epoch.ckpt.ckpt")




Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [None]:
import os

# Define the directory path you want to create
checkpoint_dir = '/kaggle/working/your_checkpoint_directory'

# Create the directory if it doesn't exist
os.makedirs(checkpoint_dir, exist_ok=True)


In [13]:
import datetime
import pandas as pd



In [15]:
progress_bar = tqdm(range(len(test_dataloader)))
predictions = {}
ids = []
for idx, batch in enumerate(test_dataloader):
    id, inputs = batch
    outputs = model(inputs)
    outputs = outputs.permute(0, 2, 3, 1)
    predictions[idx] = outputs.cpu().detach().numpy().flatten(order="C").astype(np.float32)
    ids.append(id.cpu().detach().numpy()[0])
    progress_bar.set_description(f"test prediction: {idx}")
    progress_bar.update(1)
progress_bar.close()

df = pd.DataFrame.from_dict(predictions).T
df["id"] = ids
# move id to first column
cols = df.columns.tolist()
cols = cols[-1:] + cols[:-1]
df = df[cols]
# reset index
df = df.reset_index(drop=True)
# Get the current date and time
current_datetime = datetime.datetime.now()

# Format the date and time as a string
formatted_datetime = current_datetime.strftime("%Y%m%d%H%M%S")

# Define the filename with the formatted date and time
filename = f"predictions_{formatted_datetime}.csv"

# Save the DataFrame to the CSV file
df.to_csv(filename, index=False)





test prediction: 172: 100%|██████████| 173/173 [00:10<00:00, 17.25it/s]


In [None]:
torch.save(model,"bestmodelfull.pth")