In [None]:
import sys
sys.path.append("../input/pretrained-models-pytorch")
sys.path.append("../input/efficientnet-pytorch")
sys.path.append("/kaggle/input/smp-github/segmentation_models.pytorch-master")
sys.path.append("/kaggle/input/timm-pretrained-resnest/resnest/")
import segmentation_models_pytorch as smp

In [None]:
!mkdir -p /root/.cache/torch/hub/checkpoints/
!cp /kaggle/input/timm-pretrained-resnest/resnest/gluon_resnest26-50eb607c.pth /root/.cache/torch/hub/checkpoints/gluon_resnest26-50eb607c.pth

In [None]:
%%writefile config.yaml

data_path: "/kaggle/input/contrails-images-ash-color"  # 数据路径
output_dir: "models"  # 输出路径，模型保存的位置

seed: 420  # 随机种子，用于确保实验可复现

train_bs: 48  # 训练时的批处理大小
valid_bs: 128  # 验证时的批处理大小
workers: 2  # 工作线程的数量

progress_bar_refresh_rate: 1  # 进度条刷新速率

early_stop:  # 提前停止的设置
    monitor: "val_dice"  # 监控验证损失
    mode: "max"  # 当验证损失不再下降时停止
    patience: 3  # 能够容忍多少个epoch内没有改进
    verbose: 1  # 是否打印详细信息

trainer:  # 训练器配置
    max_epochs: 33  # 最大训练周期
    min_epochs: 29  # 最小训练周期
    enable_progress_bar: True  # 是否启用进度条
    precision: "16-mixed"  # 训练精度
    devices: 2  # 设备数量

model:  # 模型配置
    seg_model: "DeepLabV3+"  # 使用的分割模型
    encoder_name: "timm-efficientnet-b1"  # 使用的编码器名称
    loss_smooth: 1.0  # 损失平滑因子
    image_size: 384  # 图片尺寸
    optimizer_params:  # 优化器参数
        lr: 0.0001  # 学习率
        weight_decay: 0.03  # 权重衰减
    scheduler:  # 学习率调度器配置
        name: "CosineAnnealingLR"  # 调度器名称
        params:  # 调度器参数
            CosineAnnealingLR:  # 余弦退火学习率调度器
                T_max: 2  # 余弦退火周期
                eta_min: 1.0e-6  # 学习率的最小值
                last_epoch: -1  # 最后一个epoch的索引，-1表示从头开始
            ReduceLROnPlateau:  # 当验证损失不再下降时减小学习率
                mode: "min"  # 模式选择为"min"
                factor: 0.31622776602  # 学习率减少的因数
                patience: 4  # 容忍的epoch数量
                verbose: True  # 是否打印详细信息


In [3]:
import torch
import numpy as np
import torchvision.transforms as T
class google_contrail_dataset(torch.utils.data.Dataset):
    def __init__(self,df,img_size=256,train=True):
        self.df=df
        
        self.normalize_img=T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) 
        self.trn=self.train
        self.img_size=img_size
        if self.img_size!=256:
            self.resize_img=T.transforms.Resize(img_size)
    
    def __getitem__(self,idx):
        #obtain the row message
        row=self.df.iloc[idx]
        con_path=row.path
        con=np.load(str(con_path))

        #get the image and label
        img=con[...,:-1]
        label=con[...,-1]

        #to tensor
        label=torch.tensor(label)
        img=torch.tensor(np.reshape(img,(256,256,3))).to(torch.float32).permute(2,0,1)

        #resize
        if self.img_size!=256:
            img=self.resize_img(img)
        img=self.normalize_img(img)

        return img.float(),label.float()
    
    def __len__(self):
        return len(self.df)


In [None]:
import torch
import pytorch_lightning as pl
import segmentation_models_pytorch as smp
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau
from torch.optim import AdamW
import torch.nn as nn
from torchmetrics.functional import dice


In [None]:
seg_models = {
    "Unet": smp.Unet,
    "Unet++": smp.UnetPlusPlus,
    "MAnet": smp.MAnet,
    "Linknet": smp.Linknet,
    "FPN": smp.FPN,
    "PSPNet": smp.PSPNet,
    "PAN": smp.PAN,
    "DeepLabV3": smp.DeepLabV3,
    "DeepLabV3+": smp.DeepLabV3Plus,
}
class pytorch_lightning_model(pl.LightningModule):
    def __init__(self,config):
        super().__init__()
        self.config=config
        self.model=model=seg_models[config["seg_model"]](
        encoder_name=config["encoder_name"],
        encoder_weight="imagenet",
        in_channels=3,
        classes=1,
        activation=None

        )
        self.loss_module= smp.losses.DiceLoss(mode="binary", smooth=config["loss_smooth"]) 
        self.val_step_outputs=[]
        self.val_step_labels=[]
    
    def forward(self,batch):
        imgs=batch
        preds=self.model(imgs)
        return preds
    def configure_optimizer(self):
        optimizer=AdamW(self.parameters(),**self.config["optimizer_params"])
        if self.config["scheduler"]["name"]=="CosineAnnealingLR":
            scheduler=CosineAnnealingLR(
                optimizer, **self.config["scheduler"]["params"]["CosineAnnealingLR"],
            )
            lr_schduler_dict={"scheduler":scheduler,"interval":"step"}
            return {"scheduler":scheduler,"lr_scheduler":lr_schduler_dict}
        elif  self.config["scheduler"]["name"] == "ReduceLROnPlateau": 
            scheduler = ReduceLROnPlateau(
                optimizer,
                **self.config["scheduler"]["params"]["ReduceLROnPlateau"],
            )  # define scheduler
            lr_scheduler = {"scheduler": scheduler, "monitor": "val_loss"}  #mentor
            return {"optimizer": optimizer, "lr_scheduler": lr_scheduler}  # return optimizer,lr_scheduler
    def train_step(self,batch,batch_idx):
        imgs,labels=batch
        preds=self.model(imgs)
        if self.config["img_size"]!=256:
            preds=torch.nn.functional.interpolate(preds,size=256,mode="bilinear")
        loss=self.loss_module(preds,labels)
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, batch_size=16)

        for param_group in self.trainer.optimizers[0].param_groups:  
            lr = param_group["lr"]
        self.log("lr", lr, on_step=True, on_epoch=False, prog_bar=True)  

        return loss  
    
    def validation_step(self,batch,batch_idx):
        imgs,labels=batch
        preds=self.model(imgs)
        if self.config["image_size"]!=256:
            preds=torch.nn.functional.interpolate(preds,size=256,mode="bilinear")
        loss=self.loss_module(preds,labels)
        self.log("val_loss", loss, on_step=False, on_epoch=True, prog_bar=True)
        self.val_step_outputs.append(preds)
        self.val_step_labels.append(labels)
    
    def on_validation_epoch_end(self):
        all_preds=torch.cat(self.val_step_outputs)
        all_labels=torch.cat(self.val_step_labels)
        all_preds=torch.sigmoid(all_preds)
        self.val_step_labels.clear()
        self.val_step_outputs.clear()
        val_dice=dice(all_preds,all_labels.long())
        self.log("val_dice", val_dice, on_step=False, on_epoch=True, prog_bar=True) 
        if self.trainer.global_rank == 0:  
            print(f"\nEpoch: {self.current_epoch}", flush=True)




In [None]:
import warnings

warnings.filterwarnings("ignore")  # 忽略警告信息

import os
import torch
import yaml
import pandas as pd
import pytorch_lightning as pl
from pprint import pprint
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, TQDMProgressBar
from torch.utils.data import DataLoader

with open("config.yaml","r") as file:
    config=yaml.safe_load(file)

contrails=os.path.join(config["data_path"],"contrails/")
train_path=os.path.join(config["data_path"],"train_df.csv")
val_path=os.path.join(config["data_path"],"valid_df.csv")

train_df=pd.read_csv(train_path)
valid_df=pd.read_csv(val_path)

train_df["path"] = contrails + train_df["record_id"].astype(str) + ".npy"
valid_df["path"] = contrails + valid_df["record_id"].astype(str) + ".npy"

dataset_train=google_contrail_dataset(train_df,img_size=256,train=True)
dataset_valid=google_contrail_dataset(valid_df,img_size=256,train=False)

train_dataloader=DataLoader(
    dataset_train,batch_size=config["train_bs"],shuffle=True,num_workers=config["workers"]

)
valid_dataloader=DataLoader(dataset_valid,batch_size=config["valid_bs"],shuffle=False,num_workers=config['workers'])

#模型保存条件
checkpoint_callback = ModelCheckpoint(
    save_weights_only=True,
    monitor="val_dice",
    dirpath=config["output_dir"],
    mode="max",
    filename="model",
    save_top_k=1,
    verbose=1,
)

progress_bar_callback = TQDMProgressBar(
    refresh_rate=config["progress_bar_refresh_rate"]
)


early_stop_callback = EarlyStopping(**config["early_stop"])


trainer = pl.Trainer(
    callbacks=[checkpoint_callback, early_stop_callback, progress_bar_callback],
    **config["trainer"],
)

config["model"]["scheduler"]["params"]["CosineAnnealingLR"]["T_max"] *= len(train_dataloader)/config["trainer"]["devices"]

# 创建模型
model = pytorch_lightning_model(config["model"])

# 训练模型
trainer.fit(model, train_dataloader, valid_dataloader)

In [None]:
batch_size = 128  # 设置批处理大小
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # 判断是否有CUDA支持，如果有则使用GPU，否则使用CPU
data = '/kaggle/input/google-research-identify-contrails-reduce-global-warming'  # 数据的总路径
data_root = '/kaggle/input/google-research-identify-contrails-reduce-global-warming/test/'  # 测试数据的路径

In [None]:
filenames = os.listdir(data_root)  # 获取data_root目录下的所有文件名
test_df = pd.DataFrame(filenames, columns=['record_id'])  # 创建一个新的DataFrame，其中包含了所有文件名，作为'record_id'列
test_df['path'] = data_root + test_df['record_id'].astype(str)  # 在DataFrame中添加新的列'path'，其中包含了每个文件的完整路径

In [None]:
class google_contrail_dataset_test(torch.utils.data.Dataset):
  def __init__(self,df,img_size=256,train=True):
     self.df=df
     self.trn=train
     self.df_idx=pd.DataFrame({"idx":os.listdir("/kaggle/input/google-research-identify-contrails-reduce-global-warming/test")})
     self.normalize=T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
     self.image_size=img_size
     if self.image_size!=256:
        self.resize_image=T.transforms.Resize(img_size)
  
  def read_record(self,directory):
     read_data={}
     for x in [
        "band_11",
        "band_14",
        "band_15"
     ]:
        read_data[x]=np.load(os.path.join(directory,x+".npy"))
     return read_data
  def normalize_range(self, data, bounds):
    
        return (data - bounds[0]) / (bounds[1] - bounds[0])
    
  def get_false_color(self, record_data):
     
        _T11_BOUNDS = (243, 303)
        _CLOUD_TOP_TDIFF_BOUNDS = (-4, 5)
        _TDIFF_BOUNDS = (-4, 2)
        
        N_TIMES_BEFORE = 4

        r = self.normalize_range(record_data["band_15"] - record_data["band_14"], _TDIFF_BOUNDS)
        g = self.normalize_range(record_data["band_14"] - record_data["band_11"], _CLOUD_TOP_TDIFF_BOUNDS)
        b = self.normalize_range(record_data["band_14"], _T11_BOUNDS)
        false_color = np.clip(np.stack([r, g, b], axis=2), 0, 1)  
        img = false_color[..., N_TIMES_BEFORE] 

        return img
  def __getitem__(self,idx):
      row=self.df.iloc[idx]
      con_path=row.path
      data=self.read_record(con_path)
      img=self.get_false_color(con_path)
      img=torch.tensor(np.reshape(img,(256,256,3))).to(torch.float32).permute(2,0,1)

      if self.image_size!=256:
          img=self.resize_image(img)
      image_id=int(self.df_idx.iloc[idx]['idx'])

      return img.float(),torch.tensor(image_id)
  def __len__(self):
      return len(self.df)
  

    
        
     


In [None]:
test_dataset=google_contrail_dataset(
    test_df,config["model"]["image_size"],False
)
test_dataloader=DataLoader(test_dataset,batch_size=batch_size,num_workers=1)

class LightningModule(pl.LightningModule):
    def __init__(self):
        super().__init__()
      
        self.model = smp.UnetPlusPlus(encoder_name="timm-resnest26d",
                              encoder_weights=None,
                              in_channels=3,
                              classes=1,
                              activation=None,
                              )

  
    def forward(self, batch):
        return self.model(batch)


model = LightningModule().load_from_checkpoint("/kaggle/working/models/model.ckpt")


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


model.to(device)


model.eval()


model.zero_grad()


In [None]:
def rle_encode(x, fg_val=1):
    """
    参数:
        x: 形状为 (height, width) 的 numpy 数组，1 表示掩膜，0 表示背景
    返回值: 以列表形式的 run-length 编码
    """

    dots = np.where(
        x.T.flatten() == fg_val)[0]  # .T 将数组以列优先顺序（Fortran顺序）变平
    run_lengths = []
    prev = -2
    for b in dots:
        if b > prev + 1:
            run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths

def list_to_string(x):
    """
    将列表转换为字符串表示形式
    空列表返回 '-'
    """
    if x:  # 非空列表
        s = str(x).replace("[", "").replace("]", "").replace(",", "")
    else:
        s = '-'
    return s

In [None]:
submission = pd.read_csv('/kaggle/input/google-research-identify-contrails-reduce-global-warming/sample_submission.csv', index_col='record_id')

In [None]:
for i ,data in enumerate(test_dataloader):
    images,image_id=data
    images=images.to(device)
    with torch.no_grad():
        pred=model.forward(images[:,:,:,:])
    if config["model"]["image_size"]!=256:
        predicted_mask=torch.nn.functional.interpolate(pred,size=256,mode="bilinear")
    predicted_mask=torch.sigmoid(predicted_mask).cpu().detach().numpy()

    predicted_mask_with_threshold = np.zeros((images.shape[0], 256, 256))
    predicted_mask_with_threshold[predicted_mask[:, 0, :, :] < 0.5] = 0
    predicted_mask_with_threshold[predicted_mask[:, 0, :, :] > 0.5] = 1

    for img_num in range(0, images.shape[0]):
        current_mask = predicted_mask_with_threshold[img_num, :, :]
        current_image_id = image_id[img_num].item()  
        
       
        submission.loc[int(current_image_id), 'encoded_pixels'] = list_to_string(rle_encode(current_mask))