In [1]:
import os
import json
import time

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, datasets
import torchvision.models as models
import torch.optim as optim

from tqdm import tqdm
from visdom import Visdom

from model import GoogLeNet
from mydataset import MyDataset
import matplotlib.pyplot as plt

设置数据集

In [2]:
num_clazz = 50
batch_size = 10
lr = 1e-3
epochs = 10
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 4])  # number of workers
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("using {} device.".format(device))
# torch.cuda.is_available()

using cuda device.


viz = Visdom()

db = MyDataset('E:\\ai_learning_resource\\hwdb\\HWDB1\\train', 224, num_clazz=5)
x, y = next(iter(db))
print(x.shape)
viz.image(db.denormalize(x), win='sample_x', opts=dict(title='sample_x'))
loader = DataLoader(db, batch_size=16, shuffle=True)

for x, y in loader:
    print(x[0].shape)
    viz.images(x, nrow=8, win='batch', opts=dict(title='batch'))
    viz.text(str(y.numpy()), win='idx', opts=dict(title='batch-y'))
    time.sleep(5)

进行训练

In [3]:
batch_size = 2

train_db = MyDataset('E:\\ai_learning_resource\\hwdb\\HWDB1\\train', 224, num_clazz=num_clazz, mode='train')
val_db = MyDataset('E:\\ai_learning_resource\\hwdb\\HWDB1\\train', 224, num_clazz=num_clazz, mode='val')
# test_db = Pokemon('E:/ai_learning_resource/pokemon/pokemon', 224, mode='test')

train_loader = DataLoader(train_db, batch_size=batch_size, shuffle=True, num_workers=nw)

val_loader = DataLoader(val_db, batch_size=batch_size, num_workers=nw//2)
# test_loader = DataLoader(test_db, batch_size=batchsz, num_workers=2)

print("using {} images for training, {} images for validation.".format(len(train_db), len(val_db)))


# def evalute(model, loader):
#     correct = 0
#     total = len(loader.dataset)

#     for x, y in loader:
#         x, y = x.to(device), y.to(device)
#         with torch.no_grad():
#             logits = model(x)
#             pred = logits.argmax(dim=1)
#         correct += torch.eq(pred, y).sum().float().item()
#     return correct / total

# net = GoogLeNet(num_classes=num_clazz, aux_logits=True, init_weights=True)
net = models.densenet121(pretrained=True)
net.classifier = nn.Sequential(nn.Linear(1024,256),
                                  nn.ReLU(),
                                  nn.Dropout(0.2),
                                  nn.Linear(256,num_clazz),
                                  nn.LogSoftmax(dim=1))

net.to(device)
optimizer = optim.Adam(net.classifier.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
criteon = nn.CrossEntropyLoss().to(device)

start = time.time()
print('start time: ', start)
# train_steps = len(train_loader)
# viz.line([0], [-1], win='loss', opts=dict(title='loss'))
# viz.line([0], [-1], win='val_acc', opts=dict(title='val_acc'))
# best_acc, best_epoch = 0, 0
# global_step = 0
for epoch in range(epochs):
    net.train()
    print('start train')
#     x, y = next(iter(train_loader))
#     print(x[0].numpy(), y[0])
    running_loss = 0.0
#     train_bar = tqdm(train_loader)
    for step, data in enumerate(train_loader):
#         print('training')
        x, y = data
        x, y = x.to(device), y.to(device)
#         logits, aux_logits2, aux_logits1 = net(x)
        logits = net(x)
        loss = criteon(logits, y)
#         loss1 = criteon(aux_logits1, y)
#         loss2 = criteon(aux_logits2, y)
#         loss = loss0 + loss1 * 0.3 + loss2 * 0.3
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        
        if step % 400 == 0:
            print('Step {}/{} \t loss:{}'.format(step, len(train_loader), loss))
        
#         viz.line([loss.item()], [global_step], win='loss', update='append')
#         global_step += 10

#         train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1, epochs, loss)
    
    # validate
    net.eval()
    acc = 0.0  # accumulate accurate number / epoch
    with torch.no_grad():
#         val_bar = tqdm(val_loader)
        for val_x, val_y in val_loader:
            val_x, val_y = val_x.to(device), val_y.to(device)
            outputs = net(val_x)  # eval model only have last output layer
            pred_y = outputs.argmax(dim=1)
            acc += torch.eq(pred_y, val_y).sum().float().item()

    val_accurate = acc / len(val_loader)
    scheduler.step()
    print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
            (epoch + 1, running_loss, val_accurate))

print('Finished Training')
print('\n{} epoch cost time {:f}s'.format(epochs, time.time()-start))

using 9514 images for training, 2379 images for validation.


Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to C:\Users\22792/.cache\torch\hub\checkpoints\densenet121-a639ec97.pth


  0%|          | 0.00/30.8M [00:00<?, ?B/s]

start time:  1617249840.4630203
start train
Step 0/4757 	 loss:4.034891605377197
Step 400/4757 	 loss:3.9056668281555176
Step 800/4757 	 loss:3.9445838928222656
Step 1200/4757 	 loss:3.9105424880981445
Step 1600/4757 	 loss:3.8826818466186523
Step 2000/4757 	 loss:3.8679099082946777
Step 2400/4757 	 loss:3.868229866027832
Step 2800/4757 	 loss:3.869323253631592
Step 3200/4757 	 loss:3.8781254291534424
Step 3600/4757 	 loss:3.9354257583618164
Step 4000/4757 	 loss:3.9866743087768555
Step 4400/4757 	 loss:3.9014649391174316
[epoch 1] train_loss: 18635.239  val_accuracy: 0.030
start train
Step 0/4757 	 loss:3.92185640335083
Step 400/4757 	 loss:3.939635753631592
Step 800/4757 	 loss:3.9056451320648193
Step 1200/4757 	 loss:3.8959262371063232
Step 1600/4757 	 loss:3.8929312229156494
Step 2000/4757 	 loss:3.979424238204956
Step 2400/4757 	 loss:3.8873977661132812
Step 2800/4757 	 loss:3.9004693031311035
Step 3200/4757 	 loss:3.858065128326416
Step 3600/4757 	 loss:3.9139342308044434
Step 40

KeyboardInterrupt: 