In [1]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms
from PIL import Image
import pandas as pd
import os
from torch import nn
from d2l import torch as d2l
from sklearn.model_selection import KFold, train_test_split

In [None]:
os.chdir('/root/autodl-tmp/train/code')
os.getcwd()

In [2]:
class CustomImageDataset(Dataset):
    def __init__(self, csv_file, imd_dir, is_train=True, transform = None, label_map = None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        self.label_map = label_map
        self.imd_dir = imd_dir
        self.is_train = is_train

        if label_map is None and self.is_train:
            self._create_label_map()

    def _create_label_map(self):
        unique_labels = sorted(self.data['label'].unique())
        self.label_map = {label:idx for idx,label in enumerate(unique_labels)}

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_rel_path = self.data.iloc[idx]['image']
        img_path = os.path.join(self.imd_dir, img_rel_path)
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        if self.is_train:
            label_text = self.data.iloc[idx]['label']
            label_idx = self.label_map[label_text]
            return image, torch.tensor(label_idx, dtype=torch.long)
        else:
            return image


In [3]:
train_transform = transforms.Compose([transforms.ToTensor()])
test_transform = transforms.Compose([transforms.ToTensor()])

In [4]:
train_csv = '../data/classify-leaves/train.csv'
test_csv = '../data/classify-leaves/test.csv'
img_root_dir = '../data/classify-leaves/images'

train_dataset = CustomImageDataset(train_csv, img_root_dir, is_train=True, transform=train_transform)
label_map = train_dataset.label_map
print(f"Using label mapping: {label_map}")

Using label mapping: {'abies_concolor': 0, 'abies_nordmanniana': 1, 'acer_campestre': 2, 'acer_ginnala': 3, 'acer_griseum': 4, 'acer_negundo': 5, 'acer_palmatum': 6, 'acer_pensylvanicum': 7, 'acer_platanoides': 8, 'acer_pseudoplatanus': 9, 'acer_rubrum': 10, 'acer_saccharinum': 11, 'acer_saccharum': 12, 'aesculus_flava': 13, 'aesculus_glabra': 14, 'aesculus_hippocastamon': 15, 'aesculus_pavi': 16, 'ailanthus_altissima': 17, 'albizia_julibrissin': 18, 'amelanchier_arborea': 19, 'amelanchier_canadensis': 20, 'amelanchier_laevis': 21, 'asimina_triloba': 22, 'betula_alleghaniensis': 23, 'betula_jacqemontii': 24, 'betula_lenta': 25, 'betula_nigra': 26, 'betula_populifolia': 27, 'broussonettia_papyrifera': 28, 'carpinus_betulus': 29, 'carpinus_caroliniana': 30, 'carya_cordiformis': 31, 'carya_glabra': 32, 'carya_ovata': 33, 'carya_tomentosa': 34, 'castanea_dentata': 35, 'catalpa_bignonioides': 36, 'catalpa_speciosa': 37, 'cedrus_atlantica': 38, 'cedrus_deodara': 39, 'cedrus_libani': 40, 'cel

In [5]:
def create_kfold_splits(full_dataset, n_splits=5, random_state=42):
    kfold = KFold(n_splits, shuffle=True, random_state=random_state)

    indices = np.arange(len(full_dataset))
    labels = [full_dataset.data.iloc[i]['label'] for i in indices]

    fold_loaders = []

    for fold, (train_idx, val_idx) in enumerate(kfold.split(indices, labels)):
        print(f"Fold {fold+1}/{n_splits}")
        print(f"  Train: {len(train_idx)} samples")
        print(f"  Validation: {len(val_idx)} samples")

        train_subset = Subset(full_dataset, train_idx)
        val_subset = Subset(full_dataset, val_idx)

        tran_iter = DataLoader(train_subset, batch_size=32, shuffle=True, num_workers=10, pin_memory=True)
        val_iter = DataLoader(val_subset, batch_size=32, shuffle=True, num_workers=10, pin_memory=True)

        fold_loaders.append((tran_iter,val_iter))
    return fold_loaders

In [6]:
# lenet
# def get_net():
#     net = nn.Sequential(
#         # 图片为224*224，输出大小为(224+2*3-5)/1+1=224
#         nn.Conv2d(3, 16, kernel_size=5, stride=2),
#         nn.ReLU(),
#         # 池化后输出大小为(224+2*0-2)/2+1=112
#         nn.AvgPool2d(kernel_size=2, stride=2),
#         # 通道扩充为32，输出大小为(112+2*0-5)/1+1=108
#         nn.Conv2d(16, 32, kernel_size=5),
#         nn.ReLU(),
#         # 输出大小为(108+2*0-2)/2+1=54
#         nn.AvgPool2d(kernel_size=2, stride=2),
#         nn.Flatten(),
#         nn.Linear(32 * 54 * 54, 512),
#         nn.Linear(512, 256),
#         nn.Linear(256, 176)
#     )
#     return net

# alexnet
def get_net():
    net = nn.Sequential(
    # 这里使用一个11*11的更大窗口来捕捉对象。
    # 同时，步幅为4，以减少输出的高度和宽度。
    # 另外，输出通道的数目远大于LeNet
    nn.Conv2d(1, 96, kernel_size=11, stride=4, padding=1), nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2),
    # 减小卷积窗口，使用填充为2来使得输入与输出的高和宽一致，且增大输出通道数
    nn.Conv2d(96, 256, kernel_size=5, padding=2), nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2),
    # 使用三个连续的卷积层和较小的卷积窗口。
    # 除了最后的卷积层，输出通道的数量进一步增加。
    # 在前两个卷积层之后，汇聚层不用于减少输入的高度和宽度
    nn.Conv2d(256, 384, kernel_size=3, padding=1), nn.ReLU(),
    nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.ReLU(),
    nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nn.Flatten(),
    # 这里，全连接层的输出数量是LeNet中的好几倍。使用dropout层来减轻过拟合
    nn.Linear(6400, 4096), nn.ReLU(),
    nn.Dropout(p=0.5),
    nn.Linear(4096, 4096), nn.ReLU(),
    nn.Dropout(p=0.5),
    # 最后是输出层。由于这里使用Fashion-MNIST，所以用类别数为10，而非论文中的1000
    nn.Linear(4096, 10))
    return net

In [7]:
def evaluate_accuracy_gpu(net, data_iter, device=None): #@save
    """使用GPU计算模型在数据集上的精度"""
    if isinstance(net, nn.Module):
        net.eval()  # 设置为评估模式
        if not device:
            device = next(iter(net.parameters())).device
    # 正确预测的数量，总预测的数量
    metric = d2l.Accumulator(2)
    with torch.no_grad():
        for X, y in data_iter:
            if isinstance(X, list):
                # BERT微调所需的（之后将介绍）
                X = [x.to(device) for x in X]
            else:
                X = X.to(device)
            y = y.to(device)
            metric.add(d2l.accuracy(net(X), y), y.numel())
    return metric[0] / metric[1]

def train_ch6(net, train_iter, test_iter, num_epochs, lr, device):
    """用GPU训练模型(在第六章定义)"""
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)
    net.apply(init_weights)
    print('training on', device)
    net.to(device)
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss = nn.CrossEntropyLoss()
    animator = d2l.Animator(xlabel='epoch', xlim=[1, num_epochs],
                            legend=['train loss', 'train acc', 'test acc'])
    timer, num_batches = d2l.Timer(), len(train_iter)
    for epoch in range(num_epochs):
        # 训练损失之和，训练准确率之和，样本数
        metric = d2l.Accumulator(3)
        net.train()
        for i, (X, y) in enumerate(train_iter):
            timer.start()
            optimizer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            optimizer.step()
            with torch.no_grad():
                metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
            timer.stop()
            train_l = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]
            if (i + 1) % (num_batches // 5) == 0 or i == num_batches - 1:
                animator.add(epoch + (i + 1) / num_batches,
                             (train_l, train_acc, None))
        test_acc = evaluate_accuracy_gpu(net, test_iter)
        animator.add(epoch + 1, (None, None, test_acc))
    print(f'loss {train_l:.3f}, train acc {train_acc:.3f}, '
          f'test acc {test_acc:.3f}')
    print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec '
          f'on {str(device)}')



In [None]:
kfold_loaders = create_kfold_splits(train_dataset, n_splits=5)

lr, num_epochs = 0.08, 10
k_fold_nets = []
for fold, (train_iter,val_iter) in enumerate(kfold_loaders):
    print(f'训练第{fold+1}折')
    net = get_net()
    train_ch6(net, train_iter, val_iter, num_epochs, lr, d2l.try_gpu())
    k_fold_nets.append(net)

d2l.plt.show()

Fold 1/5
  Train: 14682 samples
  Validation: 3671 samples
Fold 2/5
  Train: 14682 samples
  Validation: 3671 samples
Fold 3/5
  Train: 14682 samples
  Validation: 3671 samples
Fold 4/5
  Train: 14683 samples
  Validation: 3670 samples
Fold 5/5
  Train: 14683 samples
  Validation: 3670 samples
第1折
training on cpu


In [None]:
test_dataset = CustomImageDataset(test_csv, img_root_dir, is_train=False, transform=test_transform)
test_iter = DataLoader(test_dataset, batch_size=32, shuffle=True, pin_memory=True)

In [None]:
def assemble_predict(nets, dataloader, device):
    all_preds = []
    with torch.no_grad():
        for X in dataloader:
            X = X.to(device)
            logits_sum = None

            for net in nets:
                net.eval()
                logits = net(X)
                if logits_sum is None:
                    logits_sum = logits
                else:
                    logits_sum += logits

            avg_logits = logits_sum / len(net)
            preds = avg_logits.argmax(dim=1).cpu().numpy()
            all_preds.extend(preds)
    return all_preds

In [None]:
test_preds = assemble_predict(k_fold_nets, test_iter, device = d2l.try_gpu())

idx_to_label = {v: k for k, v in label_map.items()}
pred_labels = [idx_to_label[idx] for idx in test_preds]

test_img_names = test_dataset.data['image'].tolist()

submission = pd.DataFrame({
    'image': test_img_names,
    'label': pred_labels
})
submission.to_csv('../data/classify-leaves/submission.csv', index=False)
print("提交文件已保存至 ../data/classify-leaves/submission.csv")