# load files and labels

In [3]:
import os
import math
import random
import numpy as np

from skimage import io,transform
import matplotlib.pyplot as plt

import torch.nn
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch import autograd
from torch.autograd import Variable
from torchvision import transforms

from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

from visdom import Visdom
viz = Visdom()
print("visdom: ",viz.check_connection())

ROOT = "Datasets/corel_5k/images/"
dirs = [ROOT+i+"/" for i in next(os.walk(ROOT))[1]]
files = []
[files.extend([i+j for j in next(os.walk(i))[2] if "jpeg" in j]) for i in dirs]

with open("Datasets/corel_5k/labels/training_label") as f:
    train_labels = f.readlines()
train_labels = [i.split(" ")[:] for i in train_labels]
train_labels = [[int(j) for j in i if j != '' and j != '\n']for i in train_labels]
random.shuffle(train_labels)
train_label = train_labels[:4000]
val_label = train_labels[4000:]

train_label_dict = {}
for i in train_label:
    train_label_dict[str(i[0])+".jpeg"] = i[1:]
    
val_label_dict = {}
for i in val_label:
    val_label_dict[str(i[0])+".jpeg"] = i[1:]
    
with open("Datasets/corel_5k/labels/test_label") as f:
    test_labels = f.readlines()
test_labels = [i.split(" ")[:] for i in test_labels]
test_labels = [[int(j) for j in i if j != '' and j != '\n']for i in test_labels]
test_label_dict = {}
for i in test_labels:
    test_label_dict[str(i[0])+".jpeg"] = i[1:]
    
train_pairs = []
val_pairs = []
test_pairs = []
for i in files:
    img_name = i.split("/")[-1]
    if img_name in val_label_dict.keys():
        val_pairs.append((i, val_label_dict[img_name]))
    elif img_name in test_label_dict.keys():
        test_pairs.append((i, test_label_dict[img_name]))
    elif img_name in train_label_dict.keys():
        train_pairs.append((i, train_label_dict[img_name]))

visdom:  False


# datasets

In [4]:
class COREL_5K(Dataset):
    def __init__(self, data, num, trans=None):
        super(COREL_5K, self).__init__()
        self.data = data
        self.num = num
        self.trans = trans
    
    def __getitem__(self, index):
        data_path, label = self.data[index]
        label = np.array(label) - 1 # 减一以匹配矩阵下标
        img = io.imread(data_path)
        if self.trans: # 保证图像都是相同大小的矩阵
            img = self.trans(img)
        label = np.sum(np.eye(374)[label], axis=0) # 标签向量
        return img, label.astype(np.float32)
        
    def __len__(self):
        return self.num
    
    def _gen_noise_image(self, image, noise_rate):
        noise_image = np.random.uniform(-0.001, 0.001,(image.shape)).astype('float32')
        return noise_rate * noise_image + (1-noise_rate) * image

# model

In [17]:
class Inception(nn.Module):
    def __init__(self, in_channels, out1x1, n1x1To3x3, out1x1To3x3, n1x1To3x3To3x3, n3x3To3x3, out3x3, poolTo1x1out):
        super(Inception, self).__init__()
        # 1x1 conv branch
        self.branch1 = nn.Sequential(
            nn.Conv2d(in_channels, out1x1, kernel_size=1),
            nn.BatchNorm2d(out1x1),
            nn.ReLU(True)
        )
        
        # Pool -> 1x1 conv branch
        self.branch2 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            nn.Conv2d(in_channels, poolTo1x1out, kernel_size=1),
            nn.BatchNorm2d(poolTo1x1out),
            nn.ReLU(True)
        )
        
        # 1x1 -> 3x3 conv branch
        self.branch3 = nn.Sequential(
            nn.Conv2d(in_channels, n1x1To3x3, kernel_size=1),
            nn.BatchNorm2d(n1x1To3x3),
            nn.ReLU(True),
            nn.Conv2d(n1x1To3x3, out1x1To3x3, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(out1x1To3x3),
            nn.ReLU(True)
        )
        
        # 1x1 -> 3x3 -> 3x3 conv branch
        self.branch4 = nn.Sequential(
            nn.Conv2d(in_channels, n1x1To3x3To3x3, kernel_size=1),
            nn.BatchNorm2d(n1x1To3x3To3x3),
            nn.ReLU(True),
            nn.Conv2d(n1x1To3x3To3x3, n3x3To3x3, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(n3x3To3x3),
            nn.ReLU(True),
            nn.Conv2d(n3x3To3x3, out3x3, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(out3x3),
            nn.ReLU(True)
        )
        
    def forward(self, x):
        y1 = self.branch1(x)
        y2 = self.branch2(x)
        y3 = self.branch3(x)
        y4 = self.branch4(x)
        return torch.cat([y1,y2,y3,y4], 1)
    
    
class GoogLeNet(nn.Module):
    def __init__(self):
        super(GoogLeNet, self).__init__()
        
        self.convert_layer = nn.Sequential(
            nn.Conv2d(3,3,kernel_size=1),
            nn.MaxPool2d(3, stride=3),
            nn.Conv2d(3,3,kernel_size=1),
            nn.MaxPool2d(2, stride=2),
        ) 
        self.pre_layers    = nn.Sequential(
            nn.Conv2d(3, 192, kernel_size=3, padding=1),
            nn.BatchNorm2d(192),
            nn.ReLU(True),
        )
        
        self.a3 = Inception(192,  64,  96, 128, 16, 16, 32, 32)
        self.b3 = Inception(256, 128, 128, 192, 32, 32, 96, 64)

        self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)

        self.a4 = Inception(480, 192,  96, 208, 16, 16,  48,  64)
        self.b4 = Inception(512, 160, 112, 224, 24, 24,  64,  64)
        self.c4 = Inception(512, 128, 128, 256, 24, 24,  64,  64)
        self.d4 = Inception(512, 112, 144, 288, 32, 32,  64,  64)
        self.e4 = Inception(528, 256, 160, 320, 32, 32, 128, 128)

        self.a5 = Inception(832, 256, 160, 320, 32, 32, 128, 128)
        self.b5 = Inception(832, 384, 192, 384, 48, 48, 128, 128)

        self.avgpool = nn.AvgPool2d(8, stride=1)
        self.linear = nn.Linear(1024, 374)

    def forward(self, x):
        out = self.convert_layer(x)
        out = self.pre_layers(out)
        out = self.a3(out)
        out = self.b3(out)
        out = self.maxpool(out)
        out = self.a4(out)
        out = self.b4(out)
        out = self.c4(out)
        out = self.d4(out)
        out = self.e4(out)
        out = self.maxpool(out)
        out = self.a5(out)
        out = self.b5(out)
        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [18]:
BATCH_SIZE = 8
NUM_TRAIN = len(train_pairs)
NUM_TEST = len(test_pairs)

train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((192, 192)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(degrees=90),
    transforms.ToTensor(),
    transforms.Normalize([0.3853909028535724, 0.4004333749569167, 0.34717936323577203], [1,1,1]),
])

test_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((192, 192)),
    transforms.ToTensor(),
    transforms.Normalize([0.3853909028535724, 0.4004333749569167, 0.34717936323577203], [1,1,1]),
])

trainDataset = COREL_5K(train_pairs, NUM_TRAIN, train_transform)
train_loader = DataLoader(dataset=trainDataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=20, drop_last=True)

valDataset = COREL_5K(val_pairs, NUM_TEST, test_transform)
val_loader = DataLoader(dataset=valDataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=20, drop_last=False)

testDataset = COREL_5K(test_pairs, NUM_TEST, test_transform)
test_loader = DataLoader(dataset=testDataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=20, drop_last=False)

In [19]:
a = {}
labels = []
[labels.extend(i[1]) for i in train_pairs]
[labels.extend(i[1]) for i in test_pairs]
[labels.extend(i[1]) for i in val_pairs]
for i in labels:
    if i in a.keys():
        a[i] += 1
    else:
        a[i] = 1
for i in a.keys():
    a[i] = 1/a[i]

In [20]:
LEARNING_RATE = 0.001
model = GoogLeNet()
# model.cuda()
critrien = nn.BCEWithLogitsLoss(size_average=False)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [None]:
# train
NUM_EPOCHS = 10
best_acc = 0
for epoch in range(NUM_EPOCHS):
    train_loss = 0
    test_loss = 0
    train_acc = 0
    test_acc = 0
    model.train()
    for i, (data, label) in tqdm(enumerate(val_loader), total=NUM_TRAIN // BATCH_SIZE, ncols=50, leave=False, unit='b'):
        data = Variable(data)# .cuda()
        label = Variable(label)# .cuda()
        optimizer.zero_grad()
        output = model(data)
        loss = critrien(output, label)
        train_loss += loss.data[0]
        _, predict = torch.max(output, 1)
        label = label.cpu().data.numpy()
        pred = predict.data
        for i in range(len(pred)):
            if pred[i] in list(np.where(label[i]==1)[0]):
                train_acc += 1
        loss.backward()
        optimizer.step()
    model.eval()
    for i, (data, label) in enumerate(val_loader):
        data = Variable(data)# .cuda()
        label = Variable(label)# .cuda()
        output = model(data)
        loss = critrien(output, label)
        test_loss += loss.data[0]
        _, predict = torch.max(output, 1)
        label = label.cpu().data.numpy()
        pred = predict.data
        for i in range(len(pred)):
            if pred[i] in list(np.where(label[i]==1)[0]):
                test_acc += 1
    
    print('Epoch [%d/%d], Train Loss: %.4f, Train Acc: %.4f, Test Loss: %.4f, Test Acc: %.4f'
            %(epoch+1, NUM_EPOCHS, 
              train_loss / NUM_TRAIN, train_acc / NUM_TRAIN, 
              test_loss / NUM_TEST, test_acc / NUM_TEST))
    if test_acc > best_acc:
        best_acc = test_acc
        torch.save(model.state_dict(), "models/GooLeNet.pkl")

  2%|▏           | 8/500 [02:57<3:01:37, 22.15s/b]

In [None]:
# predict
def predict():
    with open("Datasets/corel_5k/labels/words") as f:
        words = [i[:-1] for i in f.readlines()]

    def img_back(img):
        mean = [0.3853909028535724, 0.4004333749569167, 0.34717936323577203]
        img[:, :, 0] = img[:, :, 0] + mean[0]
        img[:, :, 1] = img[:, :, 1] + mean[1]
        img[:, :, 2] = img[:, :, 2] + mean[2]
        return img

    BATCH_SIZE = 8
    NUM_TEST = len(test_pairs)

    testDataset = COREL_5K(test_pairs, NUM_TEST)
    test_loader = DataLoader(dataset=testDataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, drop_last=False)
    model = SEResNeXt(BottleneckX, [3, 4, 6, 3], num_classes=374)
    model.load_state_dict(torch.load("models/SEResNext1.pkl"))
    model.cuda()

    test_acc = 0
    model.eval()
    for i, (data, label) in enumerate(test_loader):
        data = Variable(data).cuda()
        label = Variable(label).cuda()
        output = model(data)
    #     _, predict = torch.max(output, 1)
        _, predict = torch.sort(output)
        label = label.cpu().data.numpy()
        pred = (predict.data)[:, -4:]
        for i in range(len(pred)):
            if len(set(pred[i]) & set(list(np.where(label[i]==1)[0]))):
                test_acc += 1
            else:
                img = data.cpu().data.numpy()[i]
                gt = [words[i] for i in list(np.where(label[i]==1)[0])]
                predic = [words[i] for i in list(pred[i].cpu().numpy())]
                viz.image(np.transpose(img_back(img), (2, 0, 1)),
                         opts=dict(title=" ".join(gt), caption=" ".join(predic)))
    print(test_acc / NUM_TEST)