In [22]:
import sys
sys.path.append("../input/pretrained-models-pytorch")
sys.path.append("../input/efficientnet-pytorch")
sys.path.append("/kaggle/input/smp-github/segmentation_models.pytorch-master")
sys.path.append("/kaggle/input/timm-pretrained-resnest/resnest/")
import segmentation_models_pytorch as smp

In [23]:
!mkdir -p /root/.cache/torch/hub/checkpoints/
!cp /kaggle/input/timm-pretrained-resnest/resnest/gluon_resnest26-50eb607c.pth /root/.cache/torch/hub/checkpoints/gluon_resnest26-50eb607c.pth

In [24]:
%%writefile config.yaml

data_path: "/kaggle/input/contrails-images-ash-color"
output_dir: "models"

seed: 42

train_bs: 48
valid_bs: 128
workers: 2

progress_bar_refresh_rate: 1

early_stop:
    monitor: "val_loss"
    mode: "min"
    patience: 999
    verbose: 1

trainer:
    max_epochs: 26
    min_epochs: 26
    enable_progress_bar: True
    precision: "16-mixed"
    devices: 2

model:
    seg_model: "DeepLabV3+"
    encoder_name: "timm-resnest26d"
    loss_smooth: 1.0
    image_size: 384
    optimizer_params:
        lr: 0.0005
        weight_decay: 0.0
    scheduler:
        name: "CosineAnnealingLR"
        params:
            CosineAnnealingLR:
                T_max: 2
                eta_min: 1.0e-6
                last_epoch: -1
            ReduceLROnPlateau:
                mode: "min"
                factor: 0.31622776601
                patience: 4
                verbose: True

Overwriting config.yaml


In [25]:
import torch
import numpy as np
import torchvision.transforms as T
import torch
import numpy as np
import torchvision.transforms as T
class google_contrail_dataset(torch.utils.data.Dataset):
    def __init__(self,df,img_size=256,train=True):
        self.df=df
        
        self.normalize_img=T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) 
        self.trn=train
        self.img_size=img_size
        if self.img_size!=256:
            self.resize_img=T.transforms.Resize(img_size)
    
    def __getitem__(self,idx):
        #obtain the row message
        row=self.df.iloc[idx]
        con_path=row.path
        con=np.load(str(con_path))

        #get the image and label
        img=con[...,:-1]
        label=con[...,-1]

        #to tensor
        label=torch.tensor(label)
        img=torch.tensor(np.reshape(img,(256,256,3))).to(torch.float32).permute(2,0,1)

        #resize
        if self.img_size!=256:
            img=self.resize_img(img)
        img=self.normalize_img(img)

        return img.float(),label.float()
    
    def __len__(self):
        return len(self.df)


In [26]:
import torch
import pytorch_lightning as pl
import segmentation_models_pytorch as smp
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau
from torch.optim import AdamW
import torch.nn as nn
from torchmetrics.functional import dice


In [27]:
seg_models = {
    "Unet": smp.Unet,
    "Unet++": smp.UnetPlusPlus,
    "MAnet": smp.MAnet,
    "Linknet": smp.Linknet,
    "FPN": smp.FPN,
    "PSPNet": smp.PSPNet,
    "PAN": smp.PAN,
    "DeepLabV3": smp.DeepLabV3,
    "DeepLabV3+": smp.DeepLabV3Plus,
}
class pytorch_lightning_model(pl.LightningModule):
    def __init__(self,config):
        super().__init__()
        self.config=config
        self.model=model=seg_models[config["seg_model"]](
        encoder_name=config["encoder_name"],
        encoder_weights="imagenet",
        in_channels=3,
        classes=1,
        activation=None

        )
        self.loss_module= smp.losses.DiceLoss(mode="binary", smooth=config["loss_smooth"]) 
        self.val_step_outputs=[]
        self.val_step_labels=[]
    
    def forward(self,batch):
        imgs=batch
        preds=self.model(imgs)
        return preds
    def configure_optimizers(self):
        optimizer=AdamW(self.parameters(),**self.config["optimizer_params"])
        if self.config["scheduler"]["name"] == "CosineAnnealingLR":
            scheduler = CosineAnnealingLR(
                optimizer,
                **self.config["scheduler"]["params"]["CosineAnnealingLR"],
            )
            lr_scheduler_dict = {"scheduler": scheduler, "interval": "step"}
            return {"optimizer": optimizer, "lr_scheduler": lr_scheduler_dict}
        elif self.config["scheduler"]["name"] == "ReduceLROnPlateau":
            scheduler = ReduceLROnPlateau(
                optimizer,
                **self.config["scheduler"]["params"]["ReduceLROnPlateau"],
            )
            lr_scheduler = {"scheduler": scheduler, "monitor": "val_loss"}
            return {"optimizer": optimizer, "lr_scheduler": lr_scheduler}

    def training_step(self,batch,batch_idx):
        imgs,labels=batch
        preds=self.model(imgs)
        if self.config["image_size"]!=256:
            preds=torch.nn.functional.interpolate(preds,size=256,mode="bilinear")
        loss=self.loss_module(preds,labels)
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, batch_size=16)

        for param_group in self.trainer.optimizers[0].param_groups:  
            lr = param_group["lr"]
        self.log("lr", lr, on_step=True, on_epoch=False, prog_bar=True)  

        return loss  
    
    def validation_step(self,batch,batch_idx):
        imgs,labels=batch
        preds=self.model(imgs)
        if self.config["image_size"]!=256:
            preds=torch.nn.functional.interpolate(preds,size=256,mode="bilinear")
        loss=self.loss_module(preds,labels)
        self.log("val_loss", loss, on_step=False, on_epoch=True, prog_bar=True)
        self.val_step_outputs.append(preds)
        self.val_step_labels.append(labels)
    
    def on_validation_epoch_end(self):
        all_preds=torch.cat(self.val_step_outputs)
        all_labels=torch.cat(self.val_step_labels)
        all_preds=torch.sigmoid(all_preds)
        self.val_step_labels.clear()
        self.val_step_outputs.clear()
        val_dice=dice(all_preds,all_labels.long())
        self.log("val_dice", val_dice, on_step=False, on_epoch=True, prog_bar=True) 
        if self.trainer.global_rank == 0:  
            print(f"\nEpoch: {self.current_epoch}", flush=True)




In [28]:
import warnings

warnings.filterwarnings("ignore")  # 忽略警告信息

import os
import torch
import yaml
import pandas as pd
import pytorch_lightning as pl
from pprint import pprint
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, TQDMProgressBar
from pytorch_lightning.callbacks.stochastic_weight_avg import StochasticWeightAveraging
from torch.utils.data import DataLoader

with open("config.yaml", "r") as file_obj:  # 打开配置文件并读取内容
    config = yaml.safe_load(file_obj)

contrails = os.path.join(config["data_path"], "contrails/")  # 设置训练数据的路径
train_path = os.path.join(config["data_path"], "train_df.csv")  # 设置训练数据标签文件的路径
valid_path = os.path.join(config["data_path"], "valid_df.csv")  # 设置验证数据标签文件的路径

train_df = pd.read_csv(train_path)  # 读取训练数据标签
valid_df = pd.read_csv(valid_path)  # 读取验证数据标签




total_df = pd.concat([train_df, valid_df], ignore_index=True)

from sklearn.model_selection import KFold

n_splits = 5  
kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42)

# 对于每一折，我们将数据分为训练集和验证集
for fold, (train_indices, val_indices) in enumerate(kfold.split(total_df)):
    print(f"Fold {fold + 1}")
    
    train_fold = total_df.iloc[train_indices]
    valid_fold = total_df.iloc[val_indices]

    train_fold["path"] = contrails + train_fold["record_id"].astype(str) + ".npy"
    valid_fold["path"] = contrails + valid_fold["record_id"].astype(str) + ".npy"

    dataset_train = google_contrail_dataset(train_fold, config["model"]["image_size"], train=True)
    dataset_validation = google_contrail_dataset(valid_fold, config["model"]["image_size"], train=False)

    data_loader_train = DataLoader(
        dataset_train,
        batch_size=config["train_bs"],
        shuffle=True,
        num_workers=config["workers"],
    )
    data_loader_validation = DataLoader(
        dataset_validation,
        batch_size=config["valid_bs"],
        shuffle=False,
        num_workers=config["workers"],
    )


    model = pytorch_lightning_model(config["model"])

    checkpoint_callback = ModelCheckpoint(
        monitor="val_dice",
        dirpath=f"models/fold_{fold + 1}",  # 为每个折创建一个新的目录
        filename="best_model",  # 保存最佳模型的文件名
        save_top_k=1,
        mode="max",  # “max”表示我们希望在验证集上的Dice值最大时保存模型
    )

    # 创建训练器
    trainer = pl.Trainer(
        callbacks=[checkpoint_callback],
        **config["trainer"],
    )

    # 训练模型
    trainer.fit(model, data_loader_train, data_loader_validation)

Fold 1
