In [5]:
from utils_directory_tree_generator import get_directory_tree

tree_generator = get_directory_tree(
    start_path='/kaggle/input',
    max_depth=5,
    include_files=True,
    sort_by='type',
    reverse=False,
    max_items=5
)
for line in tree_generator:
    print(line)

input
    google-research-identify-contrails-reduce-global-warming
        validation
            3687499407028137410
                band_10.npy
                band_14.npy
                band_15.npy
                band_16.npy
            6558861185867890815
                band_10.npy
                band_14.npy
                band_15.npy
                band_16.npy
            7355354609194882312
                band_10.npy
                band_14.npy
                band_15.npy
                band_16.npy
            7547747455642200110
                band_10.npy
                band_14.npy
                band_15.npy
                band_16.npy
        test
            1002653297254493116
                band_10.npy
                band_14.npy
                band_15.npy
                band_16.npy
            1000834164244036115
                band_10.npy
                band_14.npy
                band_15.npy
                band_16.npy
        train
            12844121126

In [6]:
! pip install -q segmentation-models-pytorch --upgrade

In [7]:
import segmentation_models_pytorch
print(segmentation_models_pytorch.__version__)


0.3.3


In [12]:
import os
import numpy as np
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.nn import BCEWithLogitsLoss
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import Compose, Lambda, Resize, RandomAffine, ColorJitter
from torch.nn.functional import pad
from PIL import Image
from utils_contrails_loss_functions import DiceLoss, FocalLoss
import segmentation_models_pytorch as smp
import pytorch_lightning as pl

# Dataset paths
train_data_path = "/kaggle/input/google-research-identify-contrails-reduce-global-warming/train"
val_data_path = "/kaggle/input/google-research-identify-contrails-reduce-global-warming/validation"
test_data_path = "/kaggle/input/google-research-identify-contrails-reduce-global-warming/test"


# Data Loader
class ContrailDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.record_dirs = [os.path.join(root_dir, d) for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]
        self.record_dirs.sort(key=lambda x: os.path.getsize(x)) # Sort dataset based on sequence length

        # Updated transformations
        self.transform = Compose([
            Lambda(lambda x: Image.fromarray((x.transpose(1, 2, 0) * 255).astype(np.uint8))),
            ColorJitter(brightness=0.1, contrast=0.1),
            Lambda(lambda x: np.array(x)),
            Lambda(lambda x: x.transpose(2, 0, 1)),
            Lambda(lambda x: torch.from_numpy(x).float()),
            RandomAffine(degrees=0, scale=(0.8, 1.2), shear=20),
            Resize((320, 320)),
            Lambda(lambda x: pad(x, (0, 0, 0, (32 - x.shape[1] % 32) % 32)))
            
        ])
        self.target_transform = Compose([
            Lambda(lambda x: torch.from_numpy(x).float())
        ])

    def __len__(self):
        return len(self.record_dirs)

    def __getitem__(self, idx):
        record_dir = self.record_dirs[idx]
        
        # Updated to use bands from 13 to 16 (4 bands)
        bands = [np.load(os.path.join(record_dir, f"band_{band}.npy"))[..., 0] for band in [13, 14, 15, 16]]
        bands_array = np.stack(bands)
        sample = bands_array

        # Load aggregated ground truth mask
        target_path = os.path.join(record_dir, 'human_pixel_masks.npy')
        target = np.load(target_path) if os.path.exists(target_path) else None

        # Apply the target transformations to the target
        if target is not None:
            # Ensure the target has only one channel and appropriate shape.
            target = np.mean(target, axis=(0, 1))

            # Ensure the data type is uint8 and values range between 0 and 255
            target = (target * 255).astype(np.uint8)

            # Convert numpy array to PIL Image, then resize, and convert back to numpy array
            target_pil = Image.fromarray(target)

            # Resize the target to 256x256 to match the model's output
            target_resized = np.array(Resize((256, 256))(target_pil))

            # Normalize to [0, 1]
            target_resized = target_resized / 255.0

            # Convert to PyTorch tensors
            sample = torch.tensor(sample, dtype=torch.float32)
            if target is not None:
                target = torch.tensor(target_resized, dtype=torch.float32).unsqueeze(0)  # Add channel dimension

            return sample, target
        
class CombinedLoss(nn.Module):
    def __init__(self):
        super(CombinedLoss, self).__init__()
        self.dice_loss = DiceLoss()
        self.focal_loss = FocalLoss()
    
    def forward(self, inputs, targets):
        dice = self.dice_loss(inputs, targets)
        focal = self.focal_loss(inputs, targets)
        return dice + focal

class ContrailModel(pl.LightningModule):
    def __init__(self, in_channels):
        super(ContrailModel, self).__init__()
        self.model = smp.Unet(
            encoder_name="resnet34",
            encoder_weights="imagenet",
            in_channels=in_channels,
            classes=1
        )
        self.loss = CombinedLoss()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.model(x)
        x = self.sigmoid(x)
        # Ensure the outputs are strictly within the [0, 1] range
        x = torch.clamp(x, 1e-5, 1-1e-5)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        print(y_hat.shape)
        print(y.shape)
        loss_val = self.loss(y_hat, y)  #  compute combined loss
        self.log('train_loss', loss_val)
        return loss_val

    def configure_optimizers(self):
        return Adam(self.parameters(), lr=0.001)

# Training Function
def train_model(train_dataset, val_dataset, model, epochs=10, batch_size=8, lr=0.001):
    # Use the collate function in data loader
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    optimizer = Adam(model.parameters(), lr=lr)
    criterion = BCEWithLogitsLoss()
    
    for epoch in range(epochs):
        model.train()
        for images, masks in train_loader: 
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, masks) 
            loss.backward()
            optimizer.step()
        
        model.eval()
        val_loss = sum(criterion(model(images), masks).item() for images, masks in val_loader) / len(val_loader)
        print(f"Epoch {epoch+1}/{epochs}, Validation Loss: {val_loss:.4f}")

# Instantiate datasets
dataset_train = ContrailDataset(root_dir=train_data_path)
dataset_val = ContrailDataset(root_dir=val_data_path)

# Instantiate model
model = ContrailModel(in_channels=4)


# Use PyTorch Lightning's Trainer to train the model
trainer = pl.Trainer(max_epochs=10)
trainer.fit(model, DataLoader(dataset_train, batch_size=8, shuffle=True), DataLoader(dataset_val, batch_size=8))

Training: 0it [00:00, ?it/s]

torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8, 1, 256, 256])
torch.Size([8,

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [22]:
# Check the installed version
! pip show pytorch_lightning

# Update to the latest version
# ! pip install -q pytorch_lightning --upgrade

!pip install -q pytorch_lightning==1.4.9



Name: pytorch-lightning
Version: 2.0.6
Summary: PyTorch Lightning is the lightweight PyTorch wrapper for ML researchers. Scale your models. Write less boilerplate.
Home-page: https://github.com/Lightning-AI/lightning
Author: Lightning AI et al.
Author-email: pytorch@lightning.ai
License: Apache-2.0
Location: /opt/conda/lib/python3.10/site-packages
Requires: fsspec, lightning-utilities, numpy, packaging, PyYAML, torch, torchmetrics, tqdm, typing-extensions
Required-by: 


In [28]:
import os
import numpy as np
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from PIL import Image
from PIL import ImageOps
from utils_contrails_loss_functions import DiceLoss, FocalLoss
import segmentation_models_pytorch as smp
import pytorch_lightning as pl

# Dataset paths
train_data_path = "/kaggle/input/google-research-identify-contrails-reduce-global-warming/train"
val_data_path = "/kaggle/input/google-research-identify-contrails-reduce-global-warming/validation"

class ContrailDataset(Dataset):
    def __init__(self, root_dir):
        self.root_dir = root_dir
        self.record_dirs = [os.path.join(root_dir, d) for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]
        self.record_dirs.sort(key=lambda x: os.path.getsize(x))
        
        self.transform = transforms.Compose([
            transforms.ColorJitter(brightness=0.1, contrast=0.1),
            transforms.RandomAffine(degrees=0, scale=(0.8, 1.2), shear=20),
            transforms.Resize((320, 320)),
            transforms.Lambda(lambda x: ImageOps.pad(x, (0, 0, 0, max(0, (32 - x.size[1] % 32) % 32))))
        ])

    def __len__(self):
        return len(self.record_dirs)

    def __getitem__(self, idx):
        record_dir = self.record_dirs[idx]
        bands = [np.load(os.path.join(record_dir, f"band_{band}.npy"))[..., 0] for band in [13, 14, 15, 16]]
        bands_array = np.stack(bands)
        image = Image.fromarray((bands_array.transpose(1, 2, 0) * 255).astype(np.uint8))
        sample = self.transform(image)
        
        # Load aggregated ground truth mask
        target_path = os.path.join(record_dir, 'human_pixel_masks.npy')
        if os.path.exists(target_path):
            target = np.mean(np.load(target_path), axis=(0, 1))
            target = Image.fromarray((target * 255).astype(np.uint8))
            target = transforms.Resize((256, 256))(target)
            target = torch.tensor(np.array(target), dtype=torch.float32) / 255.0
        else:
            target = None

        return transforms.ToTensor()(sample), target.unsqueeze(0) if target is not None else None

class CombinedLoss(nn.Module):
    def __init__(self):
        super(CombinedLoss, self).__init__()
        self.dice_loss = DiceLoss()
        self.focal_loss = FocalLoss()

    def forward(self, inputs, targets):
        return self.dice_loss(inputs, targets) + self.focal_loss(inputs, targets)

class ContrailModel(pl.LightningModule):
    def __init__(self, in_channels):
        super(ContrailModel, self).__init__()
        self.model = smp.Unet(encoder_name="resnet34", encoder_weights="imagenet", in_channels=in_channels, classes=1)
        self.loss = CombinedLoss()

    def forward(self, x):
        x = torch.sigmoid(self.model(x))
        return torch.clamp(x, 1e-5, 1-1e-5)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss_val = self.loss(y_hat, y)
        self.log('train_loss', loss_val)
        return loss_val

    def configure_optimizers(self):
        return Adam(self.parameters(), lr=0.001)

# Instantiate datasets
dataset_train = ContrailDataset(root_dir=train_data_path)
dataset_val = ContrailDataset(root_dir=val_data_path)

# Instantiate model
model = ContrailModel(in_channels=4)

# Use PyTorch Lightning's Trainer to train the model without the gpus argument
trainer = pl.Trainer(max_epochs=10)
trainer.fit(model, DataLoader(dataset_train, batch_size=8, shuffle=True), DataLoader(dataset_val, batch_size=8))


Training: 0it [00:00, ?it/s]