In [1]:
# dataset 
from torchvision import datasets
from torchvision import transforms
import torch
import pyvww
mean = [0.4698069, 0.44657433, 0.40738317]
std = [0.2762676, 0.27169052, 0.28657043]
transform_train = transforms.Compose([
    # transforms.RandomCrop(36, padding=4),
    # transforms.CenterCrop(32),
    transforms.Resize((64,64)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])
num_classes = 2
train_dataset = pyvww.pytorch.VisualWakeWordsClassification(root="/home/qhy/data/coco2017/all2017", 
                    annFile="/home/qhy/data/coco2017/annotations/vww/instances_train.json",transform=transform_train)
val_dataset = pyvww.pytorch.VisualWakeWordsClassification(root="/home/qhy/data/coco2017/all2017", 
                    annFile="/home/qhy/data/coco2017/annotations/vww/instances_val.json",transform=transform_train)

# train_dataset = datasets.ImageFolder("/home/qhy/data/vww/train",transform=transform_train)
# val_dataset = datasets.ImageFolder("/home/qhy/data/vww/val",transform=transform_train) 
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=256,
    shuffle=False,
    num_workers=4,
    pin_memory=True,
    sampler=None)
val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=128,
    shuffle=False,
    num_workers=2,
    pin_memory=True,
    sampler=None)




  from .autonotebook import tqdm as notebook_tqdm


loading annotations into memory...
Done (t=3.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.16s)
creating index...
index created!


In [8]:
from collections import OrderedDict

import numpy as np
import torch
import torch.nn as nn
import torch.utils.checkpoint as checkpoint


def make_layer(stage_num, layer_num, channel_num_in, channel_num_out, op_type,
               with_pool,pool_type):
    channel_nums_in = [channel_num_in] + [channel_num_out] * (layer_num - 1)
    layers = []
    if stage_num == 0 :
        first_layer_stride = 1
    else:
        first_layer_stride = 2
    if with_pool == True:
        if pool_type == "avgpool":
            layers.append(("avgpool", nn.AvgPool2d(2, 2)))
        else:
            layers.append(("maxpool", nn.MaxPool2d(2, 2)))
        if op_type == 'vgg':
            layers.append(("stage_{}_0_vgg".format(stage_num),
                           VGGBlock(channel_num_in,
                                    channel_num_out,
                                    kernel_size=3,
                                    stride=1)))
            layers += [("stage_{}_{}_vgg".format(stage_num, i),
                        VGGBlock(channel_num_out, channel_num_out, 3))
                       for i in range(1, layer_num)]
        else:
            layers.append(("stage_{}_0_repvgg".format(stage_num),
                           RepVGGBlock(channel_num_in,
                                       channel_num_out,
                                       kernel_size=3,
                                       stride=1,
                                       padding=1)))
            layers += [("stage_{}_{}_repvgg".format(stage_num, i),
                        RepVGGBlock(channel_num_out,
                                    channel_num_out,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)) for i in range(1, layer_num)]

    else:
        if op_type == 'vgg':
            layers.append(("stage_{}_0_vgg".format(stage_num),
                           VGGBlock(channel_num_in,
                                    channel_num_out,
                                    kernel_size=3,
                                    stride=first_layer_stride)))
            layers += [("stage_{}_{}_vgg".format(stage_num, i),
                        VGGBlock(channel_num_out, channel_num_out, 3))
                       for i in range(1, layer_num)]
        else:
            layers.append(("stage_{}_0_repvgg".format(stage_num),
                           RepVGGBlock(channel_num_in,
                                       channel_num_out,
                                       kernel_size=3,
                                       padding=1,
                                       stride=first_layer_stride)))
            layers += [("stage_{}_{}_repvgg".format(stage_num, i),
                        RepVGGBlock(channel_num_out,
                                    channel_num_out,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)) for i in range(1, layer_num)]
    return nn.Sequential(OrderedDict(layers))
def VGGBlock(in_channels,
             out_channels,
             kernel_size,
             stride=1,
             padding=1,
             dilation=1,
             groups=1,
             padding_mode='zeros'):
    conv2d = nn.Conv2d(in_channels,
                       out_channels,
                       kernel_size=kernel_size,
                       stride = stride,
                       padding=1,
                       dilation=1,
                       groups=1,
                       padding_mode='zeros')
    layers = nn.Sequential(
        OrderedDict([("conv", conv2d), ("bn", nn.BatchNorm2d(out_channels)),
                     ("relu", nn.ReLU(inplace=True))]))
    return layers

class Net(nn.Module):
    def __init__(self, num_classes):
        super(Net, self).__init__()
        self.pool = False
        self.stage_0 = make_layer(0, 1, 3, 64, "vgg",
               with_pool=False,pool_type="None")
        # self.pool = torch.nn.MaxPool2d(2,2)
        self.stage_1 = make_layer(1, 1, 64, 16, "vgg",
               with_pool=False,pool_type="None")
        self.stage_2 = make_layer(2, 1, 16, 8, "vgg",
               with_pool=False,pool_type="None")
        self.stage_3 = make_layer(3, 1, 8, 1, "vgg",
               with_pool=False,pool_type="None")
        
        
        
        # self.gap = nn.AdaptiveAvgPool2d(output_size=1)
        
        self.linear = nn.Linear(64, 32)
        self.linear_2 = nn.Linear(32, num_classes)
        
        
    def forward(self, input):
        out = self.stage_0(input)
        # out = self.pool(out)
        out = self.stage_1(out)
        # out = self.pool(out)
        out = self.stage_2(out)
        out = self.stage_3(out)
        
        
        
        # out = self.gap(out)
        # print(out.shape)
        out = self.linear(out.view(out.size(0), -1))
        out = self.linear_2(out.view(out.size(0), -1))
        
        # out = self.classifier(out.view(out.size(0), -1))
        return out
model = Net(2)
input = torch.randn(3,3,64,64)
out = model(input)

In [42]:
ExpLR.step()

In [11]:
# 对模型进行训练和参数优化
cnn_model = Net(2)
cnn_model.cuda()
learning_rate = 0.01
import tqdm 
loss_func = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn_model.parameters(),lr=learning_rate)
ExpLR = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98)

n_epochs = 100
predict_acc = []
train_acc = []

for epoch in range(n_epochs):
    running_loss = 0.0
    running_correct = 0.0
    print("Epoch  {}/{}".format(epoch, n_epochs))
    num_iter = 0
    for data in tqdm.tqdm(train_loader):
        num_iter += 1
        X_train, y_train = data
        X_train, y_train = X_train.cuda(), y_train.cuda()
        outputs = cnn_model(X_train)
        _, pred = torch.max(outputs.data, 1)
        optimizer.zero_grad()
        loss = loss_func(outputs, y_train)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        running_correct += torch.sum(pred == y_train.data)
        if num_iter > 200:
            break
    testing_correct = 0.0
    for data in tqdm.tqdm(val_loader):
        X_test, y_test = data
        # X_test, y_test = Variable(X_test), Variable(y_test)
        X_test, y_test = X_test.cuda(), y_test.cuda()
        outputs = cnn_model(X_test)
        _, pred = torch.max(outputs, 1) #返回每一行中最大值的那个元素，且返回其索引
        testing_correct += torch.sum(pred == y_test.data)
        # print(testing_correct)
    print("lr is :{:.6f} Loss is :{:.4f},Train Accuracy is:{:.4f}%,Test Accuracy is:{:.4f}%".format(
        ExpLR.get_last_lr()[0],
        running_loss / len(train_dataset), 100 * running_correct / len(train_dataset),
        100 * testing_correct / len(val_dataset)))
    ExpLR.step()
    predict_acc.append(100 * testing_correct / len(val_dataset))
    train_acc.append(100 * running_correct / len(train_dataset))
    

Epoch  0/100


 44%|████▍     | 200/451 [01:16<01:36,  2.60it/s]
100%|██████████| 63/63 [00:23<00:00,  2.72it/s]


lr is :0.010000 Loss is :0.0012,Train Accuracy is:25.6726%,Test Accuracy is:61.8935%
Epoch  1/100


 44%|████▍     | 200/451 [01:16<01:36,  2.60it/s]
100%|██████████| 63/63 [00:23<00:00,  2.72it/s]


lr is :0.009800 Loss is :0.0011,Train Accuracy is:28.5582%,Test Accuracy is:64.2760%
Epoch  2/100


 44%|████▍     | 200/451 [01:16<01:36,  2.61it/s]
100%|██████████| 63/63 [00:23<00:00,  2.72it/s]


lr is :0.009604 Loss is :0.0011,Train Accuracy is:29.8287%,Test Accuracy is:67.0431%
Epoch  3/100


 44%|████▍     | 200/451 [01:16<01:36,  2.61it/s]
100%|██████████| 63/63 [00:23<00:00,  2.72it/s]


lr is :0.009412 Loss is :0.0010,Train Accuracy is:30.8154%,Test Accuracy is:68.5445%
Epoch  4/100


 44%|████▍     | 200/451 [01:16<01:36,  2.61it/s]
100%|██████████| 63/63 [00:23<00:00,  2.72it/s]


lr is :0.009224 Loss is :0.0010,Train Accuracy is:31.3370%,Test Accuracy is:70.3313%
Epoch  5/100


 44%|████▍     | 200/451 [01:16<01:36,  2.61it/s]
100%|██████████| 63/63 [00:23<00:00,  2.72it/s]


lr is :0.009039 Loss is :0.0010,Train Accuracy is:31.8490%,Test Accuracy is:70.7036%
Epoch  6/100


 44%|████▍     | 200/451 [01:16<01:36,  2.61it/s]
100%|██████████| 63/63 [00:23<00:00,  2.72it/s]


lr is :0.008858 Loss is :0.0010,Train Accuracy is:32.1007%,Test Accuracy is:71.2744%
Epoch  7/100


 44%|████▍     | 200/451 [01:16<01:36,  2.61it/s]
100%|██████████| 63/63 [00:23<00:00,  2.72it/s]


lr is :0.008681 Loss is :0.0010,Train Accuracy is:32.3984%,Test Accuracy is:72.2546%
Epoch  8/100


 44%|████▍     | 200/451 [01:16<01:36,  2.61it/s]
100%|██████████| 63/63 [00:23<00:00,  2.72it/s]


lr is :0.008508 Loss is :0.0009,Train Accuracy is:32.7446%,Test Accuracy is:72.9123%
Epoch  9/100


 44%|████▍     | 200/451 [01:16<01:36,  2.60it/s]
100%|██████████| 63/63 [00:23<00:00,  2.72it/s]


lr is :0.008337 Loss is :0.0009,Train Accuracy is:33.0302%,Test Accuracy is:73.1108%
Epoch  10/100


 44%|████▍     | 200/451 [01:16<01:36,  2.61it/s]
100%|██████████| 63/63 [00:23<00:00,  2.72it/s]


lr is :0.008171 Loss is :0.0009,Train Accuracy is:33.2428%,Test Accuracy is:73.1356%
Epoch  11/100


 44%|████▍     | 200/451 [01:16<01:36,  2.61it/s]
100%|██████████| 63/63 [00:23<00:00,  2.72it/s]


lr is :0.008007 Loss is :0.0009,Train Accuracy is:33.4814%,Test Accuracy is:73.5327%
Epoch  12/100


 44%|████▍     | 200/451 [01:16<01:36,  2.61it/s]
100%|██████████| 63/63 [00:23<00:00,  2.72it/s]


lr is :0.007847 Loss is :0.0009,Train Accuracy is:33.5283%,Test Accuracy is:73.8677%
Epoch  13/100


 44%|████▍     | 200/451 [01:16<01:36,  2.61it/s]
100%|██████████| 63/63 [00:23<00:00,  2.72it/s]


lr is :0.007690 Loss is :0.0009,Train Accuracy is:33.5882%,Test Accuracy is:74.1903%
Epoch  14/100


 44%|████▍     | 200/451 [01:16<01:36,  2.61it/s]
100%|██████████| 63/63 [00:23<00:00,  2.72it/s]


lr is :0.007536 Loss is :0.0009,Train Accuracy is:33.8650%,Test Accuracy is:74.4757%
Epoch  15/100


 44%|████▍     | 200/451 [01:16<01:36,  2.60it/s]
100%|██████████| 63/63 [00:23<00:00,  2.72it/s]


lr is :0.007386 Loss is :0.0009,Train Accuracy is:34.0238%,Test Accuracy is:75.1706%
Epoch  16/100


 44%|████▍     | 200/451 [01:16<01:36,  2.60it/s]
100%|██████████| 63/63 [00:23<00:00,  2.72it/s]


lr is :0.007238 Loss is :0.0009,Train Accuracy is:34.0950%,Test Accuracy is:75.2451%
Epoch  17/100


 44%|████▍     | 200/451 [01:16<01:36,  2.60it/s]
100%|██████████| 63/63 [00:23<00:00,  2.72it/s]


lr is :0.007093 Loss is :0.0009,Train Accuracy is:34.2226%,Test Accuracy is:75.6049%
Epoch  18/100


 44%|████▍     | 200/451 [01:16<01:36,  2.61it/s]
100%|██████████| 63/63 [00:23<00:00,  2.72it/s]


lr is :0.006951 Loss is :0.0009,Train Accuracy is:34.2104%,Test Accuracy is:75.8655%
Epoch  19/100


 44%|████▍     | 200/451 [01:16<01:36,  2.60it/s]
100%|██████████| 63/63 [00:23<00:00,  2.72it/s]


lr is :0.006812 Loss is :0.0008,Train Accuracy is:34.3953%,Test Accuracy is:75.5429%
Epoch  20/100


 44%|████▍     | 200/451 [01:16<01:36,  2.61it/s]
100%|██████████| 63/63 [00:23<00:00,  2.72it/s]


lr is :0.006676 Loss is :0.0008,Train Accuracy is:34.4942%,Test Accuracy is:75.9524%
Epoch  21/100


 44%|████▍     | 200/451 [01:16<01:36,  2.61it/s]
100%|██████████| 63/63 [00:23<00:00,  2.72it/s]


lr is :0.006543 Loss is :0.0008,Train Accuracy is:34.4855%,Test Accuracy is:75.8779%
Epoch  22/100


 24%|██▎       | 107/451 [00:40<01:48,  3.18it/s]

In [4]:
ExpLR.step()
ExpLR.get_last_lr()[0]

2.1520920080445844e-43

In [None]:
# 添加linear2之前精度72.
# 添加linear2之后精度77.56 50epoch