In [7]:
%pip install torch pandas torchvision scikit-learn tqdm kaggle yacs timm -q

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
# upload kaggle.json first.
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [3]:
!apt update -qq
!apt install -qq unzip
!kaggle datasets download nirmalsankalana/sugarcane-leaf-disease-dataset
!unzip -q sugarcane-leaf-disease-dataset.zip -d data

127 packages can be upgraded. Run 'apt list --upgradable' to see them.
Suggested packages:
  zip
The following NEW packages will be installed:
  unzip
0 upgraded, 1 newly installed, 0 to remove and 127 not upgraded.
Need to get 175 kB of archives.
After this operation, 386 kB of additional disk space will be used.
debconf: delaying package configuration, since apt-utils is not installed

7[0;23r8[1ASelecting previously unselected package unzip.
(Reading database ... 20729 files and directories currently installed.)
Preparing to unpack .../unzip_6.0-26ubuntu3.2_amd64.deb ...
7[24;0f[42m[30mProgress: [  0%][49m[39m [..........................................................] 87[24;0f[42m[30mProgress: [ 20%][49m[39m [###########...............................................] 8Unpacking unzip (6.0-26ubuntu3.2) ...
7[24;0f[42m[30mProgress: [ 40%][49m[39m [#######################...................................] 8Setting up unzip (6.0-26ubuntu3.2) ...
7[24;0f[

In [4]:
import os
import shutil
import pandas as pd

# Define paths
data_root = "data"
images_dir = os.path.join(data_root, "images")

# Create images directory if it doesn't exist
os.makedirs(images_dir, exist_ok=True)

# List to store image paths and labels
dataset = []

# Loop through each subfolder
for subfolder in os.listdir(data_root):
    subfolder_path = os.path.join(data_root, subfolder)
    
    # Ensure it's a directory
    if os.path.isdir(subfolder_path) and subfolder != "images":
        # Loop through images inside the subfolder
        for image in os.listdir(subfolder_path):
            old_image_path = os.path.join(subfolder_path, image)
            
            # Ensure it's a file (image)
            if os.path.isfile(old_image_path):
                # Define new image path in "data/images" directory
                new_image_path = os.path.join(images_dir, image)
                
                # If filename already exists, rename it to avoid conflicts
                if os.path.exists(new_image_path):
                    base, ext = os.path.splitext(image)
                    counter = 1
                    while os.path.exists(new_image_path):
                        new_image_path = os.path.join(images_dir, f"{base}_{counter}{ext}")
                        counter += 1
                
                # Move image
                shutil.move(old_image_path, new_image_path)

                # Append to dataset with updated path and original label
                dataset.append({"image_path": new_image_path, "label": subfolder})

        # Optionally remove empty subfolder after moving images
        os.rmdir(subfolder_path)

df = pd.DataFrame(dataset)
df = df.rename(columns={'image_path':'image_id'})
df["image_id"] = df["image_id"].str.replace("data/images/", "", regex=False)

from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
df["label"] = label_encoder.fit_transform(df["label"])

df.to_csv(os.path.join(data_root, "dataset.csv"), index=False)

label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

# AUx Modules

In [None]:
import torch
import torch.nn as nn

torch.manual_seed(12345)


def randn_sampling(maxint, sample_size, batch_size):
    return torch.randint(maxint, size=(batch_size, sample_size, 2))


def collect_samples(feats, pxy, batch_size):
    return torch.stack([feats[i, :, pxy[i][:, 0], pxy[i][:, 1]] for i in range(batch_size)], dim=0)


def collect_samples_faster(feats, pxy, batch_size):
    n, c, h, w = feats.size()
    feats = feats.view(n, c, -1).permute(1, 0, 2).reshape(c, -1)  # [n, c, h, w] -> [n, c, hw] -> [c, nhw]

    pxy = ((torch.arange(n).long().to(pxy.device) * h * w).view(n, 1) + pxy[:, :, 0] * h + pxy[:, :, 1]).view(-1)  # [n, m, 2] -> [nm]
    d = (feats[:, pxy]).view(c, n, -1).permute(1, 0, 2)
    return d


def collect_positions(batch_size, N):
    all_positions = [[i, j] for i in range(N) for j in range(N)]
    pts = torch.tensor(all_positions)  # [N*N, 2]
    pts_norm = pts.repeat(batch_size, 1, 1)  # [B, N*N, 2]
    rnd = torch.stack([torch.randperm(N * N) for _ in range(batch_size)], dim=0)  # [B, N*N]
    pts_rnd = torch.stack([pts_norm[idx, r] for idx, r in enumerate(rnd)], dim=0)  # [B, N*N, 2]
    return pts_norm, pts_rnd


class DenseRelativeLoc(nn.Module):
    def __init__(self, in_dim, out_dim=2, sample_size=32, drloc_mode="l1", use_abs=False):
        super(DenseRelativeLoc, self).__init__()
        self.sample_size = sample_size
        self.in_dim = in_dim
        self.drloc_mode = drloc_mode
        self.use_abs = use_abs

        if self.drloc_mode == "l1":
            self.out_dim = out_dim
            self.layers = nn.Sequential(nn.Linear(in_dim * 2, 512), nn.ReLU(), nn.Linear(512, 512), nn.ReLU(), nn.Linear(512, self.out_dim))
        elif self.drloc_mode in ["ce", "cbr"]:
            self.out_dim = out_dim if self.use_abs else out_dim * 2 - 1
            self.layers = nn.Sequential(nn.Linear(in_dim * 2, 512), nn.ReLU(), nn.Linear(512, 512), nn.ReLU(), nn.Linear(512, 512))
            self.unshared = nn.ModuleList()
            for _ in range(2):
                self.unshared.append(nn.Linear(512, self.out_dim))
        else:
            raise NotImplementedError("We only support l1, ce and cbr now.")

    def forward_features(self, x, mode="part"):
        # x, feature map with shape: [B, C, H, W]
        B, C, H, W = x.size()

        if mode == "part":
            pxs = randn_sampling(H, self.sample_size, B).detach()  # 该函数是生成一批随机数据维度为[batch,sample_size,2]其中的值都不超过maxint值。
            pys = randn_sampling(H, self.sample_size, B).detach()

            deltaxy = (pxs - pys).float().to(x.device)  # [B, sample_size, 2]

            ptsx = collect_samples_faster(x, pxs, B).transpose(1, 2).contiguous()  # [B, sample_size, C]
            ptsy = collect_samples_faster(x, pys, B).transpose(1, 2).contiguous()  # [B, sample_size, C]
        else:
            pts_norm, pts_rnd = collect_positions(B, H)
            ptsx = x.view(B, C, -1).transpose(1, 2).contiguous()  # [B, H*W, C]
            ptsy = collect_samples(x, pts_rnd, B).transpose(1, 2).contiguous()  # [B, H*W, C]

            deltaxy = (pts_norm - pts_rnd).float().to(x.device)  # [B, H*W, 2]

        pred_feats = self.layers(torch.cat([ptsx, ptsy], dim=2))
        return pred_feats, deltaxy, H

    def forward(self, x, normalize=False):
        pred_feats, deltaxy, H = self.forward_features(x)
        deltaxy = deltaxy.view(-1, 2)  # [B*sample_size, 2]

        if self.use_abs:
            deltaxy = torch.abs(deltaxy)
            if normalize:
                deltaxy /= float(H - 1)
        else:
            deltaxy += H - 1
            if normalize:
                deltaxy /= float(2 * (H - 1))

        if self.drloc_mode == "l1":
            predxy = pred_feats.view(-1, self.out_dim)  # [B*sample_size, Output_size]
        else:
            predx, predy = self.unshared[0](pred_feats), self.unshared[1](pred_feats)
            predx = predx.view(-1, self.out_dim)  # [B*sample_size, Output_size]
            predy = predy.view(-1, self.out_dim)  # [B*sample_size, Output_size]
            predxy = torch.stack([predx, predy], dim=2)  # [B*sample_size, Output_size, 2]
        return predxy, deltaxy

    def flops(self):
        fps = self.in_dim * 2 * 512 * self.sample_size
        fps += 512 * 512 * self.sample_size
        fps += 512 * self.out_dim * self.sample_size
        if self.drloc_mode in ["ce", "cbr"]:
            fps += 512 * 512 * self.sample_size
            fps += 512 * self.out_dim * self.sample_size
        return fps

In [1]:
import pandas as pd
from dataset import build_dataset, build_loader

path = '/workspace/data/images'
df = pd.read_csv('/workspace/data/dataset.csv')

trd, testd, vald = build_dataset(df, path)

In [2]:
train_loader, test_loader, val_loader = build_loader(trd, vald, 16)

In [3]:
from train import main
main(train_loader, val_loader)

[32m[2025-03-07 05:54:12 CSWin_BOAT_64_24322_small_224][0m[33m(train.py 72)[0m: INFO Creating model:cswin_boat/CSWin_BOAT_64_24322_small_224




[32m[2025-03-07 05:54:14 CSWin_BOAT_64_24322_small_224][0m[33m(train.py 81)[0m: INFO number of params: 60235181
[32m[2025-03-07 05:54:14 CSWin_BOAT_64_24322_small_224][0m[33m(train.py 103)[0m: INFO Start training
[32m[2025-03-07 05:56:25 CSWin_BOAT_64_24322_small_224][0m[33m(train.py 205)[0m: INFO Train: [0/300][125/126]	eta 0:00:01 lr 0.000003	time 1.1001 (1.0344)	loss 1.5885 (1.6057)	grad_norm 0.9981 (1.0612)	mem 13904MB
[32m[2025-03-07 05:56:25 CSWin_BOAT_64_24322_small_224][0m[33m(train.py 220)[0m: INFO EPOCH 0 training takes 0:02:10
[32m[2025-03-07 05:56:25 CSWin_BOAT_64_24322_small_224][0m[33m(train.py 256)[0m: INFO Test: [0/32]	Time 0.798 (0.798)	Loss 1.5885 (1.5885)	Acc@1 43.750 (43.750)	Acc@5 100.000 (100.000)	Mem 13904MB
[32m[2025-03-07 05:56:32 CSWin_BOAT_64_24322_small_224][0m[33m(train.py 256)[0m: INFO Test: [10/32]	Time 0.619 (0.694)	Loss 1.5951 (1.5907)	Acc@1 37.500 (32.955)	Acc@5 100.000 (100.000)	Mem 13904MB
[32m[2025-03-07 05:56:39 CSWin_BOAT_

KeyboardInterrupt: 