In [1]:
import pandas as pd

df = pd.read_csv('train.csv')

In [2]:
import torch
from torch import nn
from PIL import Image

from torchvision import transforms, models
from torch.utils.data import DataLoader, Dataset

import os
import pandas as pd
import numpy as np

# Use GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

Device: cuda


In [3]:
df

Unnamed: 0,id,rle_mask
0,2c45b152f1,99 3 197 6 295 9 395 10 494 12 594 13 694 14 7...
1,3cb59a4fdc,1 5656
2,e185ab5dc1,4647 2 4748 10 4849 18 4950 25 5051 29 5152 34...
3,c78c89577c,101 1
4,6306dd3a8e,1 30 102 29 203 29 304 28 405 27 506 27 607 26...
...,...,...
3995,429b289e07,1 6463 6465 98 6566 97 6667 95 6768 94 6869 93...
3996,5d752d6d4a,
3997,26527458de,
3998,25fb3a895a,


In [4]:
train_images_dir = 'train/images'
train_masks_dir = 'train/masks'

import os

In [5]:
transform = transforms.Compose([
    transforms.Resize((101, 101)),
    transforms.ToTensor()
])


In [6]:
class SaltDataset(Dataset):
    def __init__(self,df,img_dir,mask_dir,transform=None):
        self.df = df
        self.img_dir = img_dir
        self.mask_dir = mask_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, (self.df.iloc[idx]['id']+'.png'))
        mask_path = os.path.join(self.mask_dir, (self.df.iloc[idx]['id']+'.png'))
        image = Image.open(img_path).convert('RGB')
        mask = Image.open(mask_path).convert('L')

        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)
        mask = (mask>0.5).float()
        return image, mask

In [7]:
# ----------------------- 5. CV – U‑Net (binary) --------------------
class UNet(nn.Module):
    def __init__(self, in_c=3, out_c=1, feats=[64,128,256]):
        super().__init__()
        self.encoder = nn.ModuleList()
        self.pool = nn.MaxPool2d(2)
        prev = in_c
        for f in feats:
            self.encoder.append(self.double_conv(prev, f))
            prev = f
        self.bottleneck = self.double_conv(feats[-1], feats[-1]*2)

        self.up_trans = nn.ModuleList()
        self.decoder  = nn.ModuleList()
        for f in feats[::-1]:
            self.up_trans.append(nn.ConvTranspose2d(f*2, f, 2, stride=2))
            self.decoder.append(self.double_conv(f*2, f))

        self.final = nn.Conv2d(feats[0], out_c, 1)

    @staticmethod
    def double_conv(in_c, out_c):
        return nn.Sequential(
            nn.Conv2d(in_c, out_c, 3, padding=1), nn.ReLU(inplace=True),
            nn.Conv2d(out_c, out_c, 3, padding=1), nn.ReLU(inplace=True)
        )

    def forward(self, x):
        skips = []
        for down in self.encoder:
            x = down(x)
            skips.append(x)
            x = self.pool(x)

        x = self.bottleneck(x)
        skips = skips[::-1]

        for idx in range(len(self.up_trans)):
            x = self.up_trans[idx](x)
            skip = skips[idx]
            if x.shape != skip.shape:  # если размеры не совпали
                x = nn.functional.interpolate(x, size=skip.shape[2:])
            x = torch.cat([skip, x], dim=1)
            x = self.decoder[idx](x)

        return self.final(x)          # логиты маски


In [8]:
train_set = SaltDataset(df,train_images_dir,train_masks_dir,transform)
train_dataloader = DataLoader(train_set, batch_size=8, shuffle=True)
model = UNet().to(device)
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.01)

In [9]:
image, mask = train_set[0]

In [10]:
image.shape

torch.Size([3, 101, 101])

In [11]:
n_epochs = 15  # Reduce for Kaggle runtime limits

for epoch in range(n_epochs):
    model.train()
    train_loss = 0
    for batch, (X, y) in enumerate(train_dataloader):
        X, y = X.to(device), y.to(device)
        y_pred = model(X)
        loss = loss_fn(y_pred, y)
        train_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    train_loss /= len(train_dataloader)
    print(f"Epoch {epoch+1} - Train Loss: {train_loss:.4f}")

Epoch 1 - Train Loss: 63.6802
Epoch 2 - Train Loss: 0.5608
Epoch 3 - Train Loss: 0.5611
Epoch 4 - Train Loss: 0.5606
Epoch 5 - Train Loss: 0.5613
Epoch 6 - Train Loss: 0.5603
Epoch 7 - Train Loss: 0.5613
Epoch 8 - Train Loss: 0.5609
Epoch 9 - Train Loss: 0.5610
Epoch 10 - Train Loss: 0.5608
Epoch 11 - Train Loss: 0.5610
Epoch 12 - Train Loss: 0.5605
Epoch 13 - Train Loss: 0.5608
Epoch 14 - Train Loss: 0.5607
Epoch 15 - Train Loss: 0.5609


In [12]:
test = pd.read_csv('sample_submission.csv')
model.eval()
def rle_encode(mask):
    pixels = mask.flatten(order='F')  # по колонкам (Fortran order)
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)
submission = []
with torch.inference_mode():
    for img_id in test['id']:
        img_path = os.path.join('test/images',img_id+'.png')
        img = Image.open(img_path).convert('RGB')
        img_tensor = transform(img).unsqueeze(0).to(device)

        y_pred = model(img_tensor)
        mask = (torch.sigmoid(y_pred)>0.5).float()
        mask_np = mask.squeeze().cpu().numpy().astype(np.uint8)

        rle = rle_encode(mask_np)
        submission.append((img_id,rle))

df_submission = pd.DataFrame(submission,columns=['id','rle_mask'])
df_submission.to_csv('submission.csv',index=False)