In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
import matplotlib.pyplot as plt
import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
import torch
import torch.nn as nn
import torch.nn.functional as F
from accelerate import Accelerator
from accelerate.utils import ProjectConfiguration
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import Normalize, Compose, ToTensor
from tqdm import tqdm


# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install names_generator

In [None]:
# Clean up output dir:
!rm -rf /kaggle/working/outputs

In [None]:
from names_generator import generate_name

# Reading and plotting data:

In [None]:
base_path = "/kaggle/input/2023-flame-ai-challenge/"
working_dir = "/kaggle/working/"
input_path = base_path + "dataset/"
#output_path = working_dir + "outputs/" #bug ici on retire output 
output_path = working_dir


# create directories for checkpoints and logs
log_dir = output_path + "logs/"
checkpoint_dir = output_path + "ckpt/"
if not os.path.exists(checkpoint_dir):
    os.makedirs(checkpoint_dir)
if not os.path.exists(log_dir):
    os.makedirs(log_dir)

train_df = pd.read_csv(input_path + "train.csv")
val_df = pd.read_csv(input_path + "val.csv")
test_df = pd.read_csv(input_path + "test.csv")

In [None]:
# print 5 rows of pandas dataframe
train_df.head()

In [None]:
idx = np.random.choice(range(len(train_df)))
print(f"Index:{idx}")
data_path = input_path + "flowfields/HR/train"
RHO_filename = train_df['rho_filename'][idx]
UX_filename = train_df['ux_filename'][idx]
UY_filename = train_df['uy_filename'][idx]
UZ_filename = train_df['uz_filename'][idx]
RHO = np.fromfile(data_path + "/" + RHO_filename, dtype="<f4")
UX = np.fromfile(data_path + "/" + UX_filename, dtype="<f4")
UY = np.fromfile(data_path + "/" + UY_filename, dtype="<f4")
UZ = np.fromfile(data_path + "/" + UZ_filename, dtype="<f4")

fig, axs = plt.subplots(1, 4, figsize=(20, 5))
axs[0].imshow(RHO.reshape(128, 128), cmap='jet')
axs[0].set_title('Density')
axs[1].imshow(UX.reshape(128, 128), cmap='jet')
axs[1].set_title('X-Velocity')
axs[2].imshow(UY.reshape(128, 128), cmap='jet')
axs[2].set_title('Y-Velocity')
axs[3].imshow(UZ.reshape(128, 128), cmap='jet')
axs[3].set_title('Z-Velocity')
plt.show()

HR_X = np.concatenate([RHO.reshape(128, 128, 1), UX.reshape(128, 128, 1),
                       UY.reshape(128, 128, 1), UZ.reshape(128, 128, 1)], axis=-1)

# plot LR features

data_path = input_path + "flowfields/LR/train"
RHO_filename = train_df['rho_filename'][idx]
UX_filename = train_df['ux_filename'][idx]
UY_filename = train_df['uy_filename'][idx]
UZ_filename = train_df['uz_filename'][idx]
RHO = np.fromfile(data_path + "/" + RHO_filename, dtype="<f4")
UX = np.fromfile(data_path + "/" + UX_filename, dtype="<f4")
UY = np.fromfile(data_path + "/" + UY_filename, dtype="<f4")
UZ = np.fromfile(data_path + "/" + UZ_filename, dtype="<f4")

LR_X = np.concatenate([RHO.reshape(16, 16, 1), UX.reshape(16, 16, 1),
                       UY.reshape(16, 16, 1), UZ.reshape(16, 16, 1)], axis=-1)

fig, axs = plt.subplots(1, 4, figsize=(20, 5))
axs[0].imshow(RHO.reshape(16, 16), cmap='jet')
axs[0].set_title('RHO')
axs[1].imshow(UX.reshape(16, 16), cmap='jet')
axs[1].set_title('UX')
axs[2].imshow(UY.reshape(16, 16), cmap='jet')
axs[2].set_title('UY')
axs[3].imshow(UZ.reshape(16, 16), cmap='jet')
axs[3].set_title('UZ')
plt.show()

# Setup PyTorch dataset and dataloaders:

In [None]:
class FlowFieldDataset(Dataset):
    def __init__(self, input_path, mode):
        assert mode in ["train", "val", "test"]
        self.mode = mode
        self.csv_file = pd.read_csv(input_path + f"{mode}.csv")
        if mode == "test":
            self.csv_file = pd.read_csv(input_path + f"{mode}.csv")
        self.LR_path = input_path + "flowfields/LR/" + mode
        self.HR_path = input_path + "flowfields/HR/" + mode

        self.mean = np.array([0.24, 28.0, 28.0, 28.0])
        self.std = np.array([0.068, 48.0, 48.0, 48.0])

    def transform(self, x):
        return Compose([ToTensor(), Normalize(self.mean, self.std, inplace=True)])(x)

    def __len__(self):
        return len(self.csv_file)

    def __getitem__(self, idx):
        # input
        if self.mode == "test":
            id = self.csv_file["id"][idx]
            rho_i = np.fromfile(self.LR_path + "/" + self.csv_file["rho_filename"][idx], dtype="<f4").reshape(16, 16)
            ux_i = np.fromfile(self.LR_path + "/" + self.csv_file["ux_filename"][idx], dtype="<f4").reshape(16, 16)
            uy_i = np.fromfile(self.LR_path + "/" + self.csv_file["uy_filename"][idx], dtype="<f4").reshape(16, 16)
            uz_i = np.fromfile(self.LR_path + "/" + self.csv_file["uz_filename"][idx], dtype="<f4").reshape(16, 16)
            X = np.stack([rho_i, ux_i, uy_i, uz_i], axis=2)
            return id, self.transform(X)

        rho_i = np.fromfile(self.LR_path + "/" + self.csv_file["rho_filename"][idx], dtype="<f4").reshape(16, 16)
        ux_i = np.fromfile(self.LR_path + "/" + self.csv_file["ux_filename"][idx], dtype="<f4").reshape(16, 16)
        uy_i = np.fromfile(self.LR_path + "/" + self.csv_file["uy_filename"][idx], dtype="<f4").reshape(16, 16)
        uz_i = np.fromfile(self.LR_path + "/" + self.csv_file["uz_filename"][idx], dtype="<f4").reshape(16, 16)
        # output
        rho_o = np.fromfile(self.HR_path + "/" + self.csv_file["rho_filename"][idx], dtype="<f4").reshape(128, 128)
        ux_o = np.fromfile(self.HR_path + "/" + self.csv_file["ux_filename"][idx], dtype="<f4").reshape(128, 128)
        uy_o = np.fromfile(self.HR_path + "/" + self.csv_file["uy_filename"][idx], dtype="<f4").reshape(128, 128)
        uz_o = np.fromfile(self.HR_path + "/" + self.csv_file["uz_filename"][idx], dtype="<f4").reshape(128, 128)
        X = np.stack([rho_i, ux_i, uy_i, uz_i], axis=2)
        Y = np.stack([rho_o, ux_o, uy_o, uz_o], axis=2)
        return self.transform(X), self.transform(Y)


In [None]:
num_filters = 64*4
num_of_residual_blocks = 30
batch_size = 256

train_dataset = FlowFieldDataset(input_path=input_path, mode="train")
val_dataset = FlowFieldDataset(input_path=input_path, mode="val")
test_dataset = FlowFieldDataset(input_path=input_path, mode="test")
train_dataloader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
val_dataloader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False, pin_memory=False)

# Setup model:

In [None]:
class ResNetBlock(nn.Module):
    def __init__(self, in_channels, num_filters, kernel_size=3):
        super(ResNetBlock, self).__init__()
        self.resnet_block = torch.nn.Sequential(
            *[
                nn.Conv2d(num_filters, num_filters, kernel_size, padding="same"),
                nn.Conv2d(num_filters, num_filters, kernel_size, padding="same"),
            ]
        )
        self.input = nn.Sequential()

    def forward(self, x):
        inp = self.input(x)
        x = self.resnet_block(x)
        return x + inp

class Model(nn.Module):
    def __init__(
        self, in_channels=4, factor=2, scale=3, num_of_residual_blocks=16, num_filters=64, kernel_size=3, **kwargs
    ):
        super().__init__()
        self.num_of_residual_blocks = num_of_residual_blocks
        self.scale = scale
        self.factor = factor
        self.in_channels = in_channels
        self.num_filters = num_filters
        self.kernel_size = kernel_size
        self.res_blocks = nn.Sequential(
            *[
                ResNetBlock(
                    in_channels=in_channels,
                    num_filters=num_filters,
                    kernel_size=kernel_size,
                )
            ]
            * num_of_residual_blocks
        )
        # Upsampling (factor ** 2) ** scale times : (2**2)**3 : 16*16 -> 128 * 128
        self.upsample = nn.Sequential(
            *[
                nn.Conv2d(num_filters, num_filters * (factor**2), kernel_size, padding="same", **kwargs),
                nn.PixelShuffle(upscale_factor=factor),
            ]
            * scale
        )
        self.resnet_input = nn.Conv2d(in_channels, num_filters, 1, padding="same")
        self.output_layer = nn.Conv2d(num_filters, in_channels, 3, padding="same")
        self.resnet_out = nn.Conv2d(self.num_filters, self.num_filters, self.kernel_size, padding="same")

    def forward(self, x):
        x = self.resnet_input(x)
        x_res = self.res_blocks(x)
        x_res = self.resnet_out(x_res)
        out = x + x_res
        out = self.upsample(out)
        return self.output_layer(out)

# Setup Accelerator and checkpointing:

In [None]:
ckpt_name = f"{generate_name()}"
num_epochs = 80
learning_rate = 0.098e-3
hyper_parameters = {"num_epochs": num_epochs, "learning_rate":learning_rate}

config = ProjectConfiguration(project_dir=working_dir, logging_dir=log_dir+ckpt_name)

#model = Model()
loss_fn = torch.nn.MSELoss()
optimizer = Adam(params=model.parameters(), lr=learning_rate)
scheduler = ReduceLROnPlateau(optimizer=optimizer)

accelerator = Accelerator(log_with="tensorboard", project_config=config)
accelerator.init_trackers(log_dir+ckpt_name, config=hyper_parameters)
model, optimizer, train_dataloader, val_dataloader, test_dataloader, scheduler = accelerator.prepare(
    model, optimizer, train_dataloader, val_dataloader, test_dataloader, scheduler
)


# Register the LR scheduler
accelerator.register_for_checkpointing(scheduler)
# Save the starting state

accelerator.save_state(output_dir=checkpoint_dir+ckpt_name)

In [None]:
ckpt_name

# Train model:

In [None]:
torch.nn.L1Loss

In [None]:
criterion = torch.nn.L1Loss()

for step, batch in enumerate(val_dataloader):
    inputs, targets = batch
    outputs = model(inputs)
    val_loss_mae = criterion(outputs, targets)


In [None]:
val_loss_mae

In [None]:
for step, batch in enumerate(val_dataloader):
    inputs, targets = batch
    outputs = model(inputs)
    val_loss_mae = torch.nn.L1Loss(outputs, targets)

In [None]:

torch.mean(torch.abs(inputs - model(inputs)))

In [None]:
progress_bar = tqdm(range(num_epochs))
criterion = torch.nn.L1Loss()

for epoch in range(num_epochs):
    model.train()
    for step, batch in enumerate(train_dataloader):
        inputs, targets = batch
        outputs = model(inputs)
        loss = F.mse_loss(outputs, targets)
        #loss_mae = criterion(outputs, targets)
        # with torch.autograd.set_detect_anomaly(True):
        optimizer.zero_grad()
        accelerator.backward(loss)
        # loss.backward(retain_graph=True)
        optimizer.step()
        progress_bar.set_description(f"epoch : {epoch} | loss : {loss.detach().cpu()}")
        accelerator.log({"epoch": epoch, "train_loss":loss.detach().cpu()}, step=step)
        #progress_bar.set_description(f"epoch : {epoch} | loss_mae : {loss_mae.detach().cpu()}")

        
    scheduler.step(loss.detach().cpu())
    
    model.eval()
    for step, batch in enumerate(val_dataloader):
        inputs, targets = batch
        outputs = model(inputs)
        val_loss = F.mse_loss(outputs, targets)
        val_loss_mae = criterion(outputs, targets)

        progress_bar.set_description(f"epoch : {epoch} | val_loss_mse : {val_loss.detach().cpu()}")
        progress_bar.update(1)
        accelerator.log({"epoch": epoch, "val_loss_mse":val_loss.detach().cpu()}, step=step)
        progress_bar.set_description(f"epoch : {epoch} | val_loss_mae : {val_loss_mae.detach().cpu()}")

        
accelerator.end_training()

# Generate submissions:

In [None]:
progress_bar = tqdm(range(len(test_dataloader)))
predictions = {}
ids = []
for idx, batch in enumerate(test_dataloader):
    id, inputs = batch
    outputs = model(inputs)
    outputs = outputs.permute(0, 2, 3, 1)
    predictions[idx] = outputs.cpu().detach().numpy().flatten(order="C").astype(np.float32)
    ids.append(id.cpu().detach().numpy()[0])
    progress_bar.set_description(f"test prediction: {idx}")
    progress_bar.update(1)
progress_bar.close()




In [None]:
#qui ne tente rien n'a rien mdr

criterion = torch.nn.L1Loss()

for step, batch in enumerate(test_dataloader):
    idinputs, targets = batch
    outputs = model(inputs)
    test_loss_mae = criterion(outputs, targets)
    test_loss_mse = F.mse_loss(outputs, targets)


print(test_loss_mae, test_loss_mse)

In [None]:
model

In [None]:
df = pd.DataFrame.from_dict(predictions).T
df["id"] = ids

In [None]:
# move id to first column
cols = df.columns.tolist()
cols = cols[-1:] + cols[:-1]
df = df[cols]

In [None]:
# reset index
df = df.reset_index(drop=True)
df.to_csv(f"{output_path}{ckpt_name}.csv", index=False) #pb ici
accelerator.load_state(checkpoint_dir + ckpt_name)



In [None]:

print("Generating predictions")
df = pd.DataFrame.from_dict(predictions).T
df["id"] = ids
# move id to first column
cols = df.columns.tolist()
cols = cols[-1:] + cols[:-1]
df = df[cols]
# reset index
df = df.reset_index(drop=True)
df.to_csv(f"{output_path}{ckpt_name}.csv", index=False)
accelerator.load_state(checkpoint_dir + ckpt_name)

In [None]:
print(ckpt_name)
df.head()

In [None]:
df.shape

In [None]:
df.to_csv(output_path + 'my_submission.csv',index=False)


In [None]:
df.head()