In [2]:
%%writefile train.py
import argparse
import os
from TripletEmbedding import TripletNet


def str2bool(v):
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
        return False
    else:
        raise argparse.ArgumentTypeError('Unsupported value encountered.')

def parase_args():
    parser = argparse.ArgumentParser()

    parser.add_argument('--photo_root', type=str, default='/kaggle/input/sketchy10classes/dataset/photo_train', help='Training photo root')
    parser.add_argument('--sketch_root', type=str, default='/kaggle/input/sketchy10classes/dataset/sketch_train',
                        help='Training sketch root')
    parser.add_argument('--batch_size', type=int, default=64, help='The size of batch (default :16')
    parser.add_argument('--device', type=str, default='0', help='The cuda device to be used (default: 0)') #'cuda' ->'0'
    parser.add_argument('--epochs', type=int, default=5, help='The number of epochs to run (default: 5)')
    parser.add_argument('--lr', type=float, default=1e-3, help='The learning rate of the model')
    
    parser.add_argument('--test', type=str2bool, nargs='?', default=True)
    parser.add_argument('--test_f', type=int, default=5, help='The frequency of testing (default: 5)')
    # parser.add_argument('--photo_test', type=str, default='dataset/photo_test', help='Testing photo root')
    # parser.add_argument('--sketch_test', type=str, default='dataset/sketch_test',
    #                     help='Testing sketch root')

    # parser.add_argument('--save_model', type=str2bool, nargs='?', default=True)
    parser.add_argument('--save_dir', type=str, default='model/resnet50',
                        help='The folder to save the model status')
    
    # I/O
    # parser.add_argument('--log_interval', type=int, default=100, metavar='N', 
    #                     help='How many batches to wait before logging training status')

    # parser.add_argument('--vis', type=str2bool, nargs='?', default=True, help='Whether to visualize')
    # parser.add_argument('--env', type=str, default='caffe2torch_tripletloss', help='The visualization environment')

    parser.add_argument('--fine_tune', type=str2bool, nargs='?', default=False, help='Whether to fine tune')
    parser.add_argument('--model_root', type=str, default=None, help='The model status files\'s root')

    parser.add_argument('--margin', type=float, default=1, help='The margin of the triplet loss')
    parser.add_argument('--p', type=int, default=2, help='The p of the triplet loss')

    parser.add_argument('--net', type=str, default='resnet50', help='The model to be used (vgg16, resnet50)')
    parser.add_argument('--cat', type=str2bool, nargs='?', default=True, help='Whether to use category loss')

    return check_args(parser.parse_args())


def check_args(args):
    if args.save_dir:
        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)
        save_dir = args.save_dir  # Thư mục lưu model
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
    try:
        assert args.epochs >= 1
    except:
        print('number of epochs must be larger than or equal to one')

    try:
        assert args.batch_size >= 1
    except:
        print('batch size must be larger than or equal to one')

    try:
        assert args.net in ['vgg16', 'resnet50']
    except:
        print('net model must be chose from [\'vgg16\', \'resnet50\']')

    if args.fine_tune:
        try:
            assert not args.model_root

        except:
            print('you should specify the model status file')

    return args


def main():

    args = parase_args()
    if args is None:
        exit()

    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.device)

    tripletNet = TripletNet(args)
    tripletNet.train()


if __name__ == '__main__':

    main()

Writing train.py


In [3]:
%%writefile TripletEmbedding.py
import torch as t
from torch import nn
from data import TripleDataLoader
from sketch_resnet import resnet50
from torch.autograd import Variable
import os


class Config(object):
    def __init__(self):
        return


class TripletNet(object):

    def __init__(self, opt): 
        # train config
        self.photo_root = opt.photo_root
        self.sketch_root = opt.sketch_root
        self.batch_size = opt.batch_size
        # self.device = opt.device
        self.epochs = opt.epochs
        self.lr = opt.lr
        # testing config
        # self.photo_test = opt.photo_test
        # self.sketch_test = opt.sketch_test
        # self.test = opt.test
        # self.test_f = opt.test_f

#         self.save_model = opt.save_model
        self.save_dir = opt.save_dir

#       fine_tune
        self.fine_tune = opt.fine_tune
        self.model_root = opt.model_root

        # dataloader config
        data_opt = Config()
        data_opt.photo_root = opt.photo_root
        data_opt.sketch_root = opt.sketch_root
        data_opt.batch_size = opt.batch_size

        self.dataloader_opt = data_opt

        # triplet config
        self.margin = opt.margin
        self.p = opt.p

        # feature extractor net
        self.net = opt.net
        self.cat = opt.cat

    def _get_vgg16(self, pretrained=True):
        model = MyVGG16(pretrained=pretrained)
        model.classifier[6] = nn.Linear(in_features=4096, out_features=125, bias=True)
        return model
    
    def _get_resnet50(self, pretrained=True):
        model = resnet50(pretrained=pretrained)
        model.fc = nn.Linear(in_features=2048, out_features=10)
        return model

    def train(self):
        device = t.device("cuda" if t.cuda.is_available() else "cpu")
        photo_net = self._get_resnet50().cuda()
#            sketch_net = self._get_resnet50()
#         photo_net.to(device)
#         # sketch_net.to(device)
        number = -1
        if self.fine_tune:
            photo_net_root = self.model_root
            name_epoch = os.path.basename(photo_net_root)
            file_name_without_extension = os.path.splitext(name_epoch)[0]
            # Tách số từ phần tên của file
            number = int(file_name_without_extension.split("_")[-1])
#             sketch_net_root = self.model_root.replace('photo', 'sketch')
            photo_net.load_state_dict(t.load(photo_net_root, map_location=t.device('cuda')))
#             sketch_net.load_state_dict(t.load(sketch_net_root, map_location=t.device('cpu')))


        print('net')
        print(photo_net)

        triplet_loss = nn.TripletMarginLoss(margin=self.margin, p=self.p).cuda()

        # optimizer
        photo_optimizer = t.optim.SGD(photo_net.parameters(),lr=self.lr ,weight_decay=0.0005, momentum=0.9)
        exp_lr_scheduler = t.optim.lr_scheduler.StepLR(photo_optimizer, 
                                                       step_size=7, gamma=0.1)

        data_loader = TripleDataLoader(self.dataloader_opt)
        dataset = data_loader.load_data()
        print('Len:', len(dataset))
        loss_list = []
        
        for epoch in range(self.epochs):
            number += 1

            print('---------------{0}---------------'.format(epoch))

            photo_net.train()
        #     sketch_net.train()
            avg = 0
            count = 0
            for ii, data in enumerate(dataset):
                count = ii
                photo_optimizer.zero_grad()
                
                photo, sketch, neg =Variable(data['P'].cuda()), Variable(data['A'].cuda()), Variable(data['N'].cuda())
                label = data['L']
    
                s_feature = photo_net(sketch)
                p_feature = photo_net(photo)
                n_feature = photo_net(neg)

                # triplet loss
#                 loss = p_cat_loss + s_cat_loss
                loss = triplet_loss(s_feature, p_feature, n_feature)

                avg += loss.item()
                loss.backward()

                photo_optimizer.step()
#                 sketch_optimizer.step()
                
#                 if (ii+1)%64 == 0:
                print('[Train] Epoch: [{0}][{1}/{2}]\t'
                        'Triplet loss  ({triplet_loss_meterr:.3f})\t'
                        .format(epoch + 1, ii + 1, len(dataset), 
                                triplet_loss_meterr=loss.item()))
                
                del photo, sketch, neg, s_feature, p_feature, n_feature, loss
                t.cuda.empty_cache()
                
            t.save(photo_net.state_dict(), self.save_dir + '/photo_' + self.net + '_%s.pth' % number)
            loss_list.append(avg/len(dataset))
            print(loss_list)

Writing TripletEmbedding.py


In [4]:
%%writefile sketch_resnet.py
import torch.nn as nn
import math
import torch 
import torch.utils.model_zoo as model_zoo
import torchvision.models as models

__all__ = ['ResNet', 'resnet50']

model_urls = {
    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}


def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out




class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=1000):
        self.inplanes = 64
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d(1)

        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        
        feature = x
        x = self.fc(x)

        return feature


def resnet50(pretrained=False, **kwargs):
    """Constructs a ResNet-50 model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['resnet50']), strict=False)
#         model = models.resnet50(pretrained=pretrained, **kwargs)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    return model

Writing sketch_resnet.py


In [5]:
%%writefile triplet_input.py
import os
import torch.utils.data as data
import torchvision.transforms as transforms
import numpy as np
from PIL import Image
import random
import cv2
import keras
import tensorflow as tf
from keras.applications.vgg16 import preprocess_input
from torch.autograd import Variable
def find_classes(root):
    classes = [d for d in os.listdir(root)]
    classes.sort()
    class_to_idex = {classes[i]: i for i in range(len(classes))}
    index_to_class = {i: classes[i] for i in range(len(classes))}
    return classes, class_to_idex, index_to_class

def make_dataset(root):
    images = []

    cnames = os.listdir(root)
    for cname in cnames:
        c_path = os.path.join(root, cname)
        fnames = os.listdir(c_path)
        for fname in fnames:
            path = os.path.join(c_path, fname)
            images.append(path)

    return images


class TripleDataset(data.Dataset):
    def __init__(self, photo_root, sketch_root, batch_size=48):
        super(TripleDataset, self).__init__()
        self.tranform = transforms.Compose([
            transforms.Resize(224),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
        ])
        classes, class_to_idx, idx_to_class = find_classes(photo_root)

        self.photo_root = photo_root
        self.sketch_root = sketch_root
        
        self.anchor_images = sorted(make_dataset(self.sketch_root))
        
        self.classes = classes
        self.class_to_idx = class_to_idx
        self.idx_to_class = idx_to_class

        self.batch_size = batch_size
        self.len = len(self.anchor_images)
        self.num_batches = len(self.anchor_images) // self.batch_size
        
    def __getitem__(self, index):
        anchor_data = self.anchor_images[index]
        pos_data, label = self._getrelate_photo(anchor_data)
        neg_data = self._getneg_photo(anchor_data)
        
#         print(anchor_data, pos_data, neg_data)
        po = Image.open(pos_data).convert('RGB')
        an = Image.open(anchor_data).convert('RGB')
        ne = Image.open(neg_data).convert('RGB')
        
#         print(an.size, po.size, ne.size)
        A = self.tranform(an)
        P = self.tranform(po)
        L = str(label)
        N = self.tranform(ne)
#         print(A.mode, P.mode, N.mode)
#         L1 = label1
        
        return {'A': A, 'P': P, 'N': N, 'L': L}
    
    def __len__(self):
        return self.len
    def __iter__(self):
        for index in range(len(self)):
            yield self[index]
    
    def _getrelate_photo(self, anchor_filename):
        # paths = anchor_filename.split('/')
        paths = os.path.normpath(anchor_filename).split(os.path.sep)
        fname = paths[-1].split('-')[0]
        cname = paths[-2]

        label = self.class_to_idx[cname]
        
        pos = '0'
        photos = os.listdir(os.path.join(self.photo_root, cname))

        for photo_name in photos:
            name = photo_name.split('.')[0]
            if  name == fname:
                pos = os.path.join(self.photo_root, cname, photo_name)
        return pos, label
    
    def _getneg_photo(self, anchor_filename):
        # paths = anchor_filename.split('/')
        paths = os.path.normpath(anchor_filename).split(os.path.sep)
        fname = paths[-1].split('-')[0]
        cname = paths[-2]

        label = self.class_to_idx[cname]
        if label == 0:
            r = list(range(label+1,10))
        else:
            r = list(range(0,label))+list(range(label+1,10))
        i = random.choice(r) #class
        negative_class = self.idx_to_class[i]
        # Kiểm tra xem thư mục của lớp âm có hình ảnh hay không
        negative_images_path = os.path.join(self.photo_root, negative_class)
        if len(os.listdir(negative_images_path)) == 0:
            # Nếu thư mục trống, chọn một hình ảnh khác
            return self._getneg_photo(anchor_filename)

        negative_image = random.choice(os.listdir(negative_images_path)) 

        negative_image = random.choice(os.listdir(os.path.join(self.photo_root, 
                                                               negative_class)))  
        return os.path.join(self.photo_root,negative_class, negative_image)

Writing triplet_input.py


In [6]:
%%writefile image_input.py
import os
import torch.utils.data as data
import torchvision.transforms as transforms
from PIL import Image


def make_dataset(root):
    images = []

    cnames = os.listdir(root)
    for cname in cnames:
        c_path = os.path.join(root, cname)
        if os.path.isdir(c_path):
            fnames = os.listdir(c_path)
            for fname in fnames:
                path = os.path.join(c_path, fname)
                images.append(path)

    return images


class ImageDataset(data.Dataset):
    def __init__(self, image_root):
        self.tranform = transforms.Compose([
            transforms.Resize(224),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])

        self.image_root = image_root
        self.image_paths = sorted(make_dataset(self.image_root))

        self.len = len(self.image_paths)

    def __getitem__(self, index):

        image_path = self.image_paths[index]

        image = Image.open(image_path).convert('RGB')
        image = self.tranform(image)

        image_path = os.path.normpath(image_path)  # Chuẩn hóa đường dẫn
        image_path = image_path.split(os.path.sep)  # Tách đường dẫn với separator của hệ điều hành
        cname = image_path[-2]
        fname = image_path[-1]

        name = os.path.join(cname, fname)  # Sử dụng os.path.join để tạo đường dẫn chuẩn
        print(name)

        return {'I': image, 'N': name}

    def __len__(self):
        return self.len

Writing image_input.py


In [7]:
%%writefile data.py
from triplet_input import TripleDataset
from image_input import ImageDataset
import torch.utils.data as data


class TripleDataLoader(data.Dataset):
    def __init__(self, opt):
        self.dataset = TripleDataset(opt.photo_root, opt.sketch_root)
        self.dataloader = data.DataLoader(
            self.dataset,
            shuffle=True,
            batch_size=opt.batch_size,
            num_workers=4,
            drop_last=True
        )

    def load_data(self):
        return self

    def __len__(self):
        return len(self.dataloader)
    
    def __getitem__(self, index):
        return self.dataset.__getitem__(index)

    def __iter__(self):
        for i, data in enumerate(self.dataloader):
            yield data


class ImageDataLoader(data.Dataset):
    def __init__(self, opt):
        self.dataset = ImageDataset(opt.image_root)
        # self.dataloader = torch.utils.data.DataLoader(
        #     self.dataset,
        #     shuffle=False,
        #     batch_size=opt.batch_size,
        #     num_workers=4,
        #     drop_last=False
        # )

    def load_data(self):
        return self

    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, index):
        return self.dataset.__getitem__(index)
    
    def __iter__(self):
        for i, data in enumerate(self.dataset):
            yield data

Writing data.py


chạy từ 5:28 -> 5:44 : 16p 10 epoch

!python train.py --epoch 3 --net resnet50 --save_dir model/resnet50  
!python train.py --epoch 3 --net resnet50 --save_dir model/resnet50 --fine_tune True --model_root  /kaggle/working/model/resnet50/photo_resnet50_1.pth  
!python train.py --epoch 10 --net resnet50 --save_dir model/resnet50 --batch_size 48 --lr 1e-5 --margin 0.3  
!python train.py --epoch 10 --net resnet50 --save_dir model/resnet50 --batch_size 64 --lr 1e-3 --margin 1  
!python train.py --epoch 20 --net resnet50 --save_dir model/resnet50 --batch_size 32 --lr 1e-5 --margin 1   
!python train.py --epoch 15 --net resnet50 --save_dir model --batch_size 48 --lr 1e-5 --margin 0.3 --fine_tune True --model_root /kaggle/input/rn_bs48_lr5_mg03/other/epoch20th/1/photo_resnet50_19.pth  
!python train.py --epoch 15 --net resnet50 --save_dir model --batch_size 64 --lr 1e-3 --margin 1    
!python train.py --epoch 20 --net resnet50 --save_dir model --batch_size 32 --lr 0.00001 --margin 1

12:06  
3:09-> 4:26 : 30 epochs  
1 tiếng 60p: 24 epochs
~ 3p 1 epoch

In [8]:
!python train.py --epoch 30 --net resnet50 --save_dir model --batch_size 32 --lr 1e-3 --margin 1

2024-05-12 09:22:12.287566: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-12 09:22:12.287681: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-12 09:22:12.425238: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth
100%|███████████████████████████████████████| 97.8M/97.8M [00:00<00:00, 272MB/s]
net
ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, a

In [23]:
import os

file_path = "/kaggle/working/model/resnet50/photo_resnet50_1.pth"

# Xóa file nếu tồn tại
if os.path.exists(file_path):
    os.remove(file_path)
    print("File đã được xóa thành công.")
else:
    print("File không tồn tại.")

File đã được xóa thành công.
