In [2]:
import csv
from turtle import forward
from tqdm import tqdm
import torch
import torch.nn as nn
import torchvision
from torchvision.transforms import transforms
from torch.utils.data import Dataset, DataLoader
from torchsummary   import summary
from torchmetrics import F1Score
import json
import os
import pandas as pd
import cv2
from PIL import Image 
import visdom


# 1. define dataset

In [3]:
def parse_file_number(col):
    return int(col.split(".")[0])

class MyDataset(Dataset) :

    def __init__(self,meta_path,root_dir,transform=None,pre_transform=None) :
        super().__init__()
        #===============meta data===============
        with open(meta_path, 'r') as file:
            temp_meta_data = json.load(file)
        meta = pd.json_normalize(temp_meta_data['annotations'])
        meta['file_name'] = meta['file_name'].apply(parse_file_number)
        meta = meta.sort_values("file_name").reset_index(drop=True)
        self.root_dir = root_dir
        
        meta['file_name'] = meta['file_name'].map(lambda x :  self.root_dir + '/' + str(x) +'.jpg')
        self.X = []
        loop = tqdm(list(meta['file_name']), total=len(meta['file_name']), leave=True)

        for i, X in enumerate(loop):
#             print(i)
            try:
                self.X.append(pre_transform(Image.open(X).convert("RGB")))
            except:
                pass
        self.y = meta['category']
        self.transform = transform
        
    def __len__(self) :
        return len(self.y)
    
    def __getitem__(self,idx) :
#         X, y = self.transforms(self.X[idx]), self.y[idx]
        X, y = self.transform(self.X[idx]), int(self.y[idx])
        return X, torch.tensor(y)


In [4]:
batch = 25

mode = 'train' 
train_data_dir = "./train_data"
meta_path = "./answer.json"

# Create training and validation datasets
# test_datasets = MyDataset(meta_path, data_dir, data_transforms['train'])

pre_transformer = transforms.Compose([
    transforms.Resize((400,400)),
    transforms.CenterCrop((224,224)),
])

transformer = transforms.Compose([
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
# train_data = MyDataset(meta_path, train_data_dir, transform=transformer)
train_data = MyDataset(meta_path, train_data_dir, transform=transformer,pre_transform=pre_transformer)

train_loader = DataLoader(
    train_data, batch_size=batch)

100%|███████████████████████████████████████████████████████████████████████████| 40000/40000 [02:29<00:00, 268.13it/s]


# 모델 정의

In [7]:
import torch
import torch.nn as nn

# 分类数目
num_class = 80
# 各层数目
resnet18_params = [2, 2, 2, 2]
resnet34_params = [3, 4, 6, 3]
resnet50_params = [3, 4, 6, 3]
resnet101_params = [3, 4, 23, 3]
resnet152_params = [3, 8, 36, 3]


# 定义Conv1层
def Conv1(in_planes, places, stride=2):
    return nn.Sequential(
        nn.Conv2d(in_channels=in_planes,out_channels=places,kernel_size=7,stride=stride,padding=3, bias=False),
        nn.BatchNorm2d(places),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    )


# 浅层的残差结构
class BasicBlock(nn.Module):
    def __init__(self,in_places,places, stride=1,downsampling=False, expansion = 1):
        super(BasicBlock,self).__init__()
        self.expansion = expansion
        self.downsampling = downsampling

        # torch.Size([1, 64, 56, 56]), stride = 1
        # torch.Size([1, 128, 28, 28]), stride = 2
        # torch.Size([1, 256, 14, 14]), stride = 2
        # torch.Size([1, 512, 7, 7]), stride = 2
        self.basicblock = nn.Sequential(
            nn.Conv2d(in_channels=in_places, out_channels=places, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(places),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=places, out_channels=places, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(places * self.expansion),
        )

        # torch.Size([1, 64, 56, 56])
        # torch.Size([1, 128, 28, 28])
        # torch.Size([1, 256, 14, 14])
        # torch.Size([1, 512, 7, 7])
        # 每个大模块的第一个残差结构需要改变步长
        if self.downsampling:
            self.downsample = nn.Sequential(
                nn.Conv2d(in_channels=in_places, out_channels=places*self.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(places*self.expansion)
            )
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        # 实线分支
        residual = x
        out = self.basicblock(x)

        # 虚线分支
        if self.downsampling:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)
        return out


# 深层的残差结构
class Bottleneck(nn.Module):

    # 注意:默认 downsampling=False
    def __init__(self,in_places,places, stride=1,downsampling=False, expansion = 4):
        super(Bottleneck,self).__init__()
        self.expansion = expansion
        self.downsampling = downsampling

        self.bottleneck = nn.Sequential(
            # torch.Size([1, 64, 56, 56])，stride=1
            # torch.Size([1, 128, 56, 56])，stride=1
            # torch.Size([1, 256, 28, 28]), stride=1
            # torch.Size([1, 512, 14, 14]), stride=1
            nn.Conv2d(in_channels=in_places,out_channels=places,kernel_size=1,stride=1, bias=False),
            nn.BatchNorm2d(places),
            nn.ReLU(inplace=True),
            # torch.Size([1, 64, 56, 56])，stride=1
            # torch.Size([1, 128, 28, 28]), stride=2
            # torch.Size([1, 256, 14, 14]), stride=2
            # torch.Size([1, 512, 7, 7]), stride=2
            nn.Conv2d(in_channels=places, out_channels=places, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(places),
            nn.ReLU(inplace=True),
            # torch.Size([1, 256, 56, 56])，stride=1
            # torch.Size([1, 512, 28, 28]), stride=1
            # torch.Size([1, 1024, 14, 14]), stride=1
            # torch.Size([1, 2048, 7, 7]), stride=1
            nn.Conv2d(in_channels=places, out_channels=places * self.expansion, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(places * self.expansion),
        )

        # torch.Size([1, 256, 56, 56])
        # torch.Size([1, 512, 28, 28])
        # torch.Size([1, 1024, 14, 14])
        # torch.Size([1, 2048, 7, 7])
        if self.downsampling:
            self.downsample = nn.Sequential(
                nn.Conv2d(in_channels=in_places, out_channels=places*self.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(places*self.expansion)
            )
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        # 实线分支
        residual = x
        out = self.bottleneck(x)

        # 虚线分支
        if self.downsampling:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class ResNet(nn.Module):
    def __init__(self,blocks, blockkinds, num_classes=num_class):
        super(ResNet,self).__init__()

        self.blockkinds = blockkinds
        self.conv1 = Conv1(in_planes = 3, places= 64)

        # 对应浅层网络结构
        if self.blockkinds == BasicBlock:
            self.expansion = 1
            # 64 -> 64
            self.layer1 = self.make_layer(in_places=64, places=64, block=blocks[0], stride=1)
            # 64 -> 128
            self.layer2 = self.make_layer(in_places=64, places=128, block=blocks[1], stride=2)
            # 128 -> 256
            self.layer3 = self.make_layer(in_places=128, places=256, block=blocks[2], stride=2)
            # 256 -> 512
            self.layer4 = self.make_layer(in_places=256, places=512, block=blocks[3], stride=2)

            self.fc = nn.Linear(512, num_classes)

        # 对应深层网络结构
        if self.blockkinds == Bottleneck:
            self.expansion = 4
            # 64 -> 64
            self.layer1 = self.make_layer(in_places = 64, places= 64, block=blocks[0], stride=1)
            # 256 -> 128
            self.layer2 = self.make_layer(in_places = 256,places=128, block=blocks[1], stride=2)
            # 512 -> 256
            self.layer3 = self.make_layer(in_places=512,places=256, block=blocks[2], stride=2)
            # 1024 -> 512
            self.layer4 = self.make_layer(in_places=1024,places=512, block=blocks[3], stride=2)

            self.fc = nn.Linear(2048, num_classes)

        self.avgpool = nn.AvgPool2d(7, stride=1)

        # 初始化网络结构
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                # 采用了何凯明的初始化方法
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def make_layer(self, in_places, places, block, stride):

        layers = []

        # torch.Size([1, 64, 56, 56])  -> torch.Size([1, 256, 56, 56])， stride=1 故w，h不变
        # torch.Size([1, 256, 56, 56]) -> torch.Size([1, 512, 28, 28])， stride=2 故w，h变
        # torch.Size([1, 512, 28, 28]) -> torch.Size([1, 1024, 14, 14])，stride=2 故w，h变
        # torch.Size([1, 1024, 14, 14]) -> torch.Size([1, 2048, 7, 7])， stride=2 故w，h变
        # 此步需要通过虚线分支，downsampling=True
        layers.append(self.blockkinds(in_places, places, stride, downsampling =True))

        # torch.Size([1, 256, 56, 56]) -> torch.Size([1, 256, 56, 56])
        # torch.Size([1, 512, 28, 28]) -> torch.Size([1, 512, 28, 28])
        # torch.Size([1, 1024, 14, 14]) -> torch.Size([1, 1024, 14, 14])
        # torch.Size([1, 2048, 7, 7]) -> torch.Size([1, 2048, 7, 7])
        # print("places*self.expansion:", places*self.expansion)
        # print("block:", block)
        # 此步需要通过实线分支，downsampling=False， 每个大模块的第一个残差结构需要改变步长
        for i in range(1, block):
            layers.append(self.blockkinds(places*self.expansion, places))

        return nn.Sequential(*layers)


    def forward(self, x):

        # conv1层
        x = self.conv1(x)   # torch.Size([1, 64, 56, 56])

        # conv2_x层
        x = self.layer1(x)  # torch.Size([1, 256, 56, 56])
        # conv3_x层
        x = self.layer2(x)  # torch.Size([1, 512, 28, 28])
        # conv4_x层
        x = self.layer3(x)  # torch.Size([1, 1024, 14, 14])
        # conv5_x层
        x = self.layer4(x)  # torch.Size([1, 2048, 7, 7])

        x = self.avgpool(x) # torch.Size([1, 2048, 1, 1]) / torch.Size([1, 512])
        x = x.view(x.size(0), -1)   # torch.Size([1, 2048]) / torch.Size([1, 512])
        x = self.fc(x)      # torch.Size([1, 5])

        return x

def ResNet18():
    return ResNet(resnet18_params, BasicBlock)

def ResNet34():
    return ResNet(resnet34_params, BasicBlock)

def ResNet50():
    return ResNet(resnet50_params, Bottleneck)

def ResNet101():
    return ResNet(resnet101_params, Bottleneck)

def ResNet152():
    return ResNet(resnet152_params, Bottleneck)



device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = ResNet101().to(device)

# 트레커 생성

In [8]:
vis = visdom.Visdom()
vis.close(env="main")
def loss_tracker(loss_plot, loss_value, num):
    '''num, loss_value, are Tensor'''
    vis.line(X=num,
             Y=loss_value,
             win = loss_plot,
             update='append'
             )

Setting up a new session...


In [9]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(net.parameters(), lr = 0.005,momentum=0.9)

lr_sche = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9)

loss_plt = vis.line(Y=torch.Tensor(1).zero_(),opts=dict(title='loss_tracker', legend=['loss'], showlegend=True))

In [11]:
# print(len(trainloader))
epochs = 1

for epoch in range(epochs):  # loop over the dataset multiple times
    running_loss = 0.0
    lr_sche.step()
    loop = tqdm(train_loader, total=len(train_loader), leave=True)
#     for i, data in enumerate(train_loader, 0):
    for i, (inputs, labels) in enumerate(loop):
        # get the inputs
#         print(labels)
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
#         print(outputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        lr_sche.step()
        # print statistics
        running_loss += loss.item()
        if i % 20 == 19:    # print every 30 mini-batches
            loss_tracker(loss_plt, torch.Tensor([running_loss/20]), torch.Tensor([i + epoch*len(train_loader) ]))
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 20))
            running_loss = 0.0
    torch.save(net.state_dict(), "./model_"+ str(epoch) + ".pth")

print('Finished Training')

  1%|█                                                                               | 20/1600 [00:11<14:55,  1.76it/s]

[1,    20] loss: 4.982


  2%|██                                                                              | 40/1600 [00:23<14:44,  1.76it/s]

[1,    40] loss: 4.847


  4%|███                                                                             | 60/1600 [00:34<14:29,  1.77it/s]

[1,    60] loss: 4.774


  5%|████                                                                            | 80/1600 [00:45<14:15,  1.78it/s]

[1,    80] loss: 4.570


  6%|████▉                                                                          | 100/1600 [00:57<14:06,  1.77it/s]

[1,   100] loss: 4.524


  8%|█████▉                                                                         | 120/1600 [01:08<13:54,  1.77it/s]

[1,   120] loss: 4.388


  9%|██████▉                                                                        | 140/1600 [01:19<13:42,  1.77it/s]

[1,   140] loss: 4.387


 10%|███████▉                                                                       | 160/1600 [01:30<13:32,  1.77it/s]

[1,   160] loss: 4.321


 11%|████████▉                                                                      | 180/1600 [01:42<13:20,  1.77it/s]

[1,   180] loss: 4.313


 12%|█████████▉                                                                     | 200/1600 [01:53<13:08,  1.78it/s]

[1,   200] loss: 4.314


 14%|██████████▊                                                                    | 220/1600 [02:04<12:55,  1.78it/s]

[1,   220] loss: 4.318


 15%|███████████▊                                                                   | 240/1600 [02:15<12:46,  1.77it/s]

[1,   240] loss: 4.365


 16%|████████████▊                                                                  | 260/1600 [02:27<14:03,  1.59it/s]

[1,   260] loss: 4.306


 18%|█████████████▊                                                                 | 280/1600 [02:39<13:47,  1.60it/s]

[1,   280] loss: 4.300


 19%|██████████████▊                                                                | 300/1600 [02:50<12:13,  1.77it/s]

[1,   300] loss: 4.341


 20%|███████████████▊                                                               | 320/1600 [03:02<12:02,  1.77it/s]

[1,   320] loss: 4.299


 21%|████████████████▊                                                              | 340/1600 [03:13<12:13,  1.72it/s]

[1,   340] loss: 4.290


 22%|█████████████████▏                                                             | 347/1600 [03:18<11:56,  1.75it/s]


KeyboardInterrupt: 