In [None]:
!cp -r ../input/petnets/timm-0.4.12-py3-none-any.whl /kaggle/working
!pip install -qq ./timm-0.4.12-py3-none-any.whl
!cp -r ../input/petnets/model_swin.py /kaggle/working

In [None]:
import model_swin

In [None]:
from torch.utils.data import Dataset
import os
from torchvision import transforms
from PIL import Image
import numpy as np
import torch
import csv
from tqdm import tqdm
import pandas as pd


class MyData(Dataset):

    def __init__(self, photo_root, labels_root=None, is_train=True, use_meta=False, transforms_input=None):
        self.photo_root = photo_root  # path of data
        self.labels_root = labels_root  # root of train.csv/test.csv
        self.is_train = is_train  # 当前是否处于训练状态
        # self.use_meta = use_meta
        self.transforms = transforms_input  # 预处理方式

        self.df = pd.read_csv(labels_root)

    def __getitem__(self, index):
        # img = Image.open()
        # img_label
        # return img,img_label
        path = os.path.join(self.photo_root, self.df['Id'][index]) + '.jpg'
        img = Image.open(path)
        if self.transforms is not None:
            img = self.transforms(img)

        if self.is_train:
            label = torch.as_tensor(self.df['Pawpularity'][index]).reshape(1)
            return img, label
        else:
            return img

    def __len__(self):
        # length of dataset
        return len(self.df)

In [None]:

# ensemble
# 混合精度训练
import math

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset, Subset, ConcatDataset
from torchvision import transforms
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from torch.utils.tensorboard import SummaryWriter
# import data_new
from tqdm import tqdm

from torch.cuda.amp import GradScaler, autocast

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset, Subset, ConcatDataset
from torchvision import transforms
from torch.optim import AdamW
from torch.utils.tensorboard import SummaryWriter
import math
import random
from tqdm import tqdm
from sklearn.model_selection import KFold

MY_DATA_MEAN = [0.5155, 0.4858, 0.4506]
MY_DATA_STD = [0.2693, 0.2650, 0.2666]
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]


# class TraindataSet(Dataset):
#     def __init__(self, train_features, train_labels):
#         self.x_data = train_features
#         self.y_data = train_labels
#         self.len = len(train_labels)
#
#     def __getitem__(self, index):
#         return self.x_data[index], self.y_data[index]
#
#     def __len__(self):
#         return self.len
class MySubset(Dataset):
    r"""
    Subset of a dataset at specified indices.

    Args:
        dataset (Dataset): The whole Dataset
        indices (sequence): Indices in the whole set selected for subset
    """

    def __init__(self, dataset, indices, transforms_input) -> None:
        self.dataset = dataset
        self.indices = indices
        self.transforms = transforms_input

    def __getitem__(self, idx):
        img, label = self.dataset[self.indices[idx]]
        img = self.transforms(img)
        return img, label

    def __len__(self):
        return len(self.indices)


# 训练函数
def train(pretrain_dir, train_data, test_data, k_num, epochs, lr, weight_decay,
          batch_size, device):
    train_loss_rmse, test_loss_mean = 0.0, 0.0  # 存储train_loss,test_loss
    test_loss_min = 100.0
    dataloader_train = DataLoader(train_data, batch_size, shuffle=True)
    dataloader_test = DataLoader(test_data, batch_size)

    # model
#     net = model_swin.SwinTransformer()
#     net = model_swin.SwinTransformer(embed_dim=192, depths=[2, 2, 18, 2], num_heads=[6, 12, 24, 48], 
#                           num_classes=1000)  # 实例化模型large
    net = model_swin.SwinTransformer(embed_dim=96, 
                                     depths=[2, 2, 18, 2],
                                     num_heads=[3, 6, 12, 24])  # 实例化模型small
    # net = model_swin.SwinTransformer(embed_dim=96, depths=[2, 2, 18, 2],
    #                                  num_heads=[3, 6, 12, 24], drop_rate=0.1,
    #                                  attn_drop_rate=0.1, drop_path_rate=0.1)  # 实例化模型
    checkpoint = torch.load(pretrain_dir, map_location='cpu')
    net.load_state_dict(checkpoint['model'], strict=False)
    in_features = net.head.in_features
#     net.head = nn.Sequential(nn.Dropout(0.3), nn.Linear(in_features=in_features, out_features=1))
    net.head = nn.Linear(in_features=in_features, out_features=1)
    net.to(device)

    # 这里使用了AdamW优化算法
    # optimizer = torch.optim.Adam(params=net.parameters(), lr=learning_rate, weight_decay=weight_decay)
    eps = 1e-8
    betas = (0.9, 0.999)
    # weight_decay = 0.05
    optimizer = AdamW(net.parameters(), eps=eps, betas=betas, lr=lr, weight_decay=weight_decay)
    scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=5, eta_min=1e-7)
    # loss_fn_train = nn.BCELoss()
    loss_fn_train = nn.BCEWithLogitsLoss()
    loss_fn_test = nn.MSELoss()
    sigmoid = nn.Sigmoid()

    scaler = GradScaler()

    writer = SummaryWriter('logs')
    for epoch in range(epochs):
        train_step = 0
        test_step = 0
        '''loss of train'''
        total_train_rmse = 0.0
        '''loss of test'''
        total_test_loss = 0.0

        net.train()
        print("-----epoch:{}/{} of fold:{}-----".format(epoch + 1, epochs, k_num))
#         for img, labels in tqdm(dataloader_train):
        for img, labels in dataloader_train:
            img = img.to(device)
            labels = (labels / 100).to(device)
            optimizer.zero_grad()
            with autocast():
                outputs = net(img)
                loss = loss_fn_train(outputs, labels)
                outputs = sigmoid(outputs)
                loss_r = nn.functional.mse_loss(outputs * 100, labels * 100)
            scaler.scale(loss).backward()
            # loss.backward()
            scaler.step(optimizer)
            # optimizer.step()
            scheduler.step()
            scaler.update()

            #             outputs = net(img)
            #             loss = loss_fn_train(outputs, labels)
            #             outputs = sigmoid(outputs)
            #             loss_r = nn.functional.mse_loss(outputs*100, labels*100)
            #             loss.backward()
            #             optimizer.step()

            train_step += 1
            total_train_rmse += loss_r.item()

            if train_step % 20 == 0:
                print('fold:{}, epoch:{}, train step = {}, loss={}'.format(k_num, epoch + 1, train_step, loss.item()))

        net.eval()

        with torch.no_grad():
#             for img, labels in tqdm(dataloader_test):
            for img, labels in dataloader_test:
                img = img.to(device)
                labels = labels.to(device)

                outputs = net(img)
                outputs = sigmoid(outputs)
                loss = loss_fn_test(outputs * 100, labels)
                test_step += 1
                total_test_loss += loss.item()

        train_loss_rmse = math.sqrt(total_train_rmse / train_step)
        test_loss_mean = math.sqrt(total_test_loss / test_step)
        print("-----epoch:{}/{} of fold:{} finished-----".format(epoch + 1, epochs, k_num))
        print('train_loss_rmse={}'.format(train_loss_rmse))
        print('test_loss_mean={}'.format(test_loss_mean))
        writer.add_scalar('train_loss_rmse of fold:{}'.format(k_num), train_loss_rmse, epoch)
        writer.add_scalar('test_loss of fold:{}'.format(k_num), total_test_loss, epoch)

#         torch.save(net.state_dict(), 'net{}_of_fold_{}.pth'.format(epoch + 1, k_num))
        if test_loss_mean<test_loss_min:
            test_loss_min = test_loss_mean
            torch.save(net.state_dict(), 'net_of_fold_{}.pth'.format(k_num))
            print("model is saved successfully")
            print('current val loss={}'.format(test_loss_min))
    writer.close()

    net = None
    optimizer = None
    #     torch.cuda.empty_cache()

    return train_loss_rmse, test_loss_min


def k_fold(k, X_train, pretrain_dir,
           transforms_train,
           transforms_test,
           device, num_epochs=3,
           learning_rate=0.001,
           weight_decay=0.05,
           batch_size=5):
    train_loss_rmse_sum, valid_loss_sum = 0, 0
    # train_acc_sum, valid_acc_sum = 0, 0
    kf = KFold(n_splits=k, shuffle=True, random_state=2)
    for i, (train_index, test_index) in enumerate(kf.split(X_train)):
        data_train = MySubset(X_train, train_index, transforms_train)
        data_val = MySubset(X_train, test_index, transforms_test)

        # 每份数据进行训练,体现步骤三####
        train_ls_rmse, valid_ls = train(pretrain_dir, data_train, data_val, i+1, num_epochs,
                                                  learning_rate, weight_decay, batch_size, device=device)

        print('*' * 25, '第', i + 1, '折', '*' * 25)
#         print('train_loss_rmse:{:.6f}'.format(train_ls_rmse),
#               'valid loss:{:.6f}'.format(valid_ls))
        print('valid loss:{:.6f}'.format(valid_ls))

        train_loss_rmse_sum += train_ls_rmse
        valid_loss_sum += valid_ls
    print('#' * 10, '最终k折交叉验证结果', '#' * 10)
    # 体现步骤四
#     print('train_loss_rmse:{:.4f}'.format(train_loss_rmse_sum / k),
#           'valid_loss_sum:{:.4f}'.format(valid_loss_sum / k))
    print('valid_loss_sum:{:.4f}'.format(valid_loss_sum / k))

def main(parser_data):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using device:{}".format(device))

    '''setting of transform'''
    data_transform = transforms.Compose(
        [transforms.RandomResizedCrop(224),
         transforms.RandomHorizontalFlip(),
         transforms.RandomVerticalFlip(),
         transforms.ToTensor(),
         transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)]
    )
    
    data_transform2 = transforms.Compose(
        [transforms.RandomResizedCrop(224),
         transforms.RandomHorizontalFlip(),
         transforms.RandomAffine(15, translate=(0.1, 0.1), scale=(0.9, 1.1)),
         transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
         transforms.ToTensor(),
         transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)]
    )

    test_transform = transforms.Compose(
        [transforms.Resize([224, 224]),
         #          transforms.CenterCrop(224),
         transforms.ToTensor(),
         transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)]
    )
    '''divide data of train_data and test_data'''
    data_path = parser_data.img_path
    label_path = parser_data.labels_path
    train_data = MyData(photo_root=data_path, labels_root=label_path)

    k_fold(parser_data.fold, train_data, parser_data.pretrain_dir,
           transforms_train=data_transform,
           transforms_test=test_transform,
           device=device, num_epochs=parser_data.epochs, learning_rate=parser_data.lr,
           batch_size=parser_data.batch_size)  # 交叉验证



In [None]:
def train_kaggle(): 
    import argparse

    parser = argparse.ArgumentParser(
        description=__doc__)

    # 训练数据集的目录
    parser.add_argument('--img_path', default='../input/petfinder-pawpularity-score/train', help='dataset')
    # 训练数据集labels目录
    parser.add_argument('--labels_path', default='../input/petfinder-pawpularity-score/train.csv', help='labels')
    # pretrained
    parser.add_argument('--pretrain_dir', default='../input/petnets/swin_small_patch4_window7_224.pth', help='path where to save')
    # fold
    parser.add_argument('--fold', default=10, type=int, metavar='N')
    # 训练的总epoch数
    parser.add_argument('--epochs', default=10, type=int, metavar='N',
                        help='number of total epochs to run')
    # 训练的batch size
    parser.add_argument('--batch_size', default=64, type=int, metavar='N',
                        help='batch size when training.')
    # lr
    parser.add_argument('--lr', default=1e-4, type=float, metavar='N',
                        help='learning_rate')

    args = parser.parse_args(args=[])
    print(args)

    main(args)

In [None]:
train_kaggle()