In [1]:
from torch.autograd import Variable
import torch.nn.functional as F
import torch.nn as nn
import torch
import torchvision
from torchvision import datasets, transforms
import torch.utils.data as data
import torchvision.models as models
import matplotlib.image as pli
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
from PIL import Image
from PIL import ImageOps
from PIL import ImageEnhance
import random
import math
import pickle
import glob
import librosa
import os
import time
import scipy.signal as ss
from enum import Enum

os.environ["CUDA_VISIBLE_DEVICES"] = "1"
print(torch.cuda.is_available())
path = './dataset/train'
labels = os.listdir(path)
pos_train_folders = {l: glob.glob(f'{path}/{l}/[0-9][0-9]*/') for l in labels}
pos_val_folders = {l: glob.glob(f'{path}/{l}/[0-9]/') for l in labels}
# print(pos_train_folders)
# print(pos_val_folders)
print(labels)

is_plot = False

freq_length = 57
time_length = 221
trainingset_size = 10000
val_set_size = 100
batch_size = 64 if torch.cuda.is_available() else 8

True
['toothpaste_box', 'whiteboard_spray', 'toy_elephant', 'green_basketball', '061_foam_brick', 'shiny_toy_gun', 'salt_cylinder', 'strawberry', 'stanley_screwdriver', 'yellow_block']


In [2]:
import cv2
from findContourCenter import findContourCenter
def crop_img(folder, is_debug):
    sp = 100
    mask_img_files = glob.glob(f'{folder}mask/*.png')
    mask_file = random.choice(mask_img_files)
    rgb_file = mask_file.replace('mask', 'rgb').replace('png', 'jpg')
    rgb_img = plt.imread(rgb_file)
    rgb_img = np.moveaxis(rgb_img, -1, 0)
    mask_img = np.zeros((rgb_img.shape[1],rgb_img.shape[2]))
    mask_img[20:460, 100:540] = plt.imread(mask_file)
    rgb_img = rgb_img * np.uint8(mask_img)
    center_mask, _ = findContourCenter(mask_img)
    # center_rgb = np.array([center_mask[0] + 20, center_mask[1] + 100])

    crop_img = rgb_img[:, max(int(center_mask[0]) - sp, 0): int(center_mask[0]) + sp, max(int(center_mask[1]) - sp, 0): int(center_mask[1]) + sp]
    if is_debug:
        plt.imshow(mask_img)
        plt.show()
        plt.imshow(np.moveaxis(rgb_img, 0, -1))
        plt.show()
        plt.imshow(np.moveaxis(crop_img, 0, -1))
        plt.show()

    return crop_img

In [3]:
class ImageSet(data.Dataset):
    def __init__(self, behav):
        if behav == 'train':
            self.length = trainingset_size
        elif behav == 'val':
            self.length = val_set_size
        else:
            raise Exception('Error')
        self.behav = behav

    def __getitem__(self, index):
        # print(index)

        label = random.randint(0,9)
        if self.behav == 'train':
            folder = random.choice(pos_train_folders[labels[label]])
        elif self.behav == 'val':
            folder = random.choice(pos_val_folders[labels[label]])
        else:
            raise Exception('Error')
        rgb_img = crop_img(folder, is_plot)
        img = np.zeros((3,200,200), dtype='uint8')
        row_begin = 100 - int(rgb_img.shape[1]/2)
        col_begin = 100 - int(rgb_img.shape[2]/2)
        img[:, row_begin:row_begin+rgb_img.shape[1], col_begin:col_begin+rgb_img.shape[2]] = rgb_img
        img = torch.from_numpy(img)

        if is_plot:
            print(img.shape)
            plt.imshow(np.moveaxis(img.numpy(), 0, -1))
            plt.show()

        return img, label

    def __len__(self):
        return self.length

train_loader = data.DataLoader(ImageSet('train'), batch_size=batch_size, shuffle=True)

In [4]:
class ImageCNN(nn.Module):
    def __init__(self,):
        super(ImageCNN, self).__init__()
        self.layer1 = nn.Sequential(
            # 200 200
            nn.Conv2d(in_channels=3, out_channels=32,
                      kernel_size=7),
            # 194 194
            nn.BatchNorm2d(32),
            nn.ReLU()
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=self.layer1[0].out_channels, out_channels=32,
                      kernel_size=6, stride=2),
            # 95 95
            nn.BatchNorm2d(32),
            nn.ReLU()
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(in_channels=self.layer2[0].out_channels,
                      out_channels=64, kernel_size=6),
            # 90 90
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        self.layer4 = nn.Sequential(
            nn.Conv2d(in_channels=self.layer3[0].out_channels,
                      out_channels=64, kernel_size=6, stride=2),
            # 43 43
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        self.layer5 = nn.Sequential(
            nn.Conv2d(in_channels=self.layer4[0].out_channels,
                      out_channels=128, kernel_size=5),
            # 39 39
            nn.BatchNorm2d(128),
            nn.ReLU()
        )
        self.layer6 = nn.Sequential(
            nn.Conv2d(in_channels=self.layer5[0].out_channels,
                      out_channels=128, kernel_size=3, stride=2),
            # 19 19
            nn.BatchNorm2d(128),
            nn.ReLU()
        )
        self.avg_pool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
        self.fc = nn.Linear(self.layer6[0].out_channels, len(labels))

    def forward(self, input):
        out = self.layer1(input)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        # print(out.shape)
        out = self.avg_pool(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        return out

In [5]:
imgNet = ImageCNN()

In [6]:
state_dict = torch.load('./imgNet.model')
imgNet.load_state_dict(state_dict)

<All keys matched successfully>

In [8]:
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(imgNet.parameters(), lr=0.001)

imgNet.train()
device = torch.device("cuda") if torch.cuda.is_available() else torch.device('cpu')
imgNet = imgNet.to(device)

for i, (imgs, lbs) in enumerate(train_loader):
    imgs = imgs.float().to(device)
    lbs = lbs.to(device)
    outputs = imgNet(imgs)
    loss = loss_func(outputs, lbs)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    predict = torch.argmax(F.softmax(outputs, dim=1), dim=1)
    # print(int(round(time.time() * 1000)))
    if i % 2 == 0:
        print(f"""i = {i},  loss = {loss},
        labels = {lbs}
        predict = {predict}
        accuracy = {float(sum(lbs == predict))/float(lbs.size(0))}""")

i = 0,  loss = 0.07257258147001266,
        labels = tensor([3, 6, 2, 3, 6, 7, 8, 3, 5, 9, 0, 8, 2, 4, 2, 6, 6, 6, 3, 2, 9, 8, 8, 9,
        4, 4, 2, 6, 4, 8, 3, 0, 2, 7, 9, 0, 5, 3, 0, 8, 2, 0, 5, 3, 5, 9, 0, 0,
        7, 9, 9, 5, 0, 0, 8, 7, 8, 0, 9, 5, 4, 0, 6, 5], device='cuda:0')
        predict = tensor([3, 6, 2, 3, 6, 7, 8, 3, 5, 9, 0, 8, 2, 4, 1, 6, 6, 6, 3, 2, 9, 8, 8, 9,
        4, 4, 1, 6, 4, 8, 3, 0, 2, 7, 9, 0, 5, 3, 0, 8, 2, 0, 5, 3, 5, 9, 0, 0,
        7, 9, 9, 5, 0, 0, 8, 7, 8, 0, 9, 5, 4, 0, 6, 5], device='cuda:0')
        accuracy = 0.96875
i = 2,  loss = 0.04665343090891838,
        labels = tensor([3, 6, 8, 6, 5, 6, 3, 8, 5, 4, 0, 3, 6, 5, 7, 7, 3, 9, 4, 1, 2, 6, 1, 4,
        9, 0, 6, 2, 8, 5, 5, 0, 6, 5, 9, 0, 0, 7, 3, 3, 3, 2, 5, 5, 4, 3, 0, 6,
        6, 9, 4, 5, 9, 8, 9, 3, 2, 8, 3, 4, 6, 5, 9, 6], device='cuda:0')
        predict = tensor([3, 6, 8, 7, 5, 6, 3, 8, 5, 4, 0, 3, 6, 5, 7, 7, 3, 9, 4, 1, 2, 6, 1, 4,
        9, 0, 6, 2, 8, 5, 5, 0, 6, 5, 9, 0, 0, 7,

KeyboardInterrupt: 

In [10]:
# 保存模型， 请谨慎操作， 会覆盖文件中的模型
torch.save(imgNet.state_dict(), './imgNet.model')

In [9]:
val_loader = data.DataLoader(ImageSet('val'), batch_size=50, shuffle=False)

imgNet.eval()
device = torch.device("cuda") if torch.cuda.is_available() else torch.device('cpu')
imgNet = imgNet.to(device)

for i, (imgs, lbs) in enumerate(val_loader):
    imgs = imgs.float().to(device)
    lbs = lbs.to(device)
    outputs = imgNet(imgs)
    predict = torch.argmax(F.softmax(outputs, dim=1), dim=1)
    if i % 1 == 0:
        print(f"i = {i}, \n lables = {lbs}, \n predict = {predict}  \n accuracy = {float(sum(lbs == predict))/float(lbs.size(0))}")

i = 0, 
 lables = tensor([3, 9, 9, 4, 0, 4, 5, 7, 1, 2, 4, 9, 2, 4, 3, 0, 6, 6, 5, 3, 7, 6, 5, 1,
        4, 3, 2, 7, 0, 8, 0, 8, 2, 7, 4, 8, 8, 1, 7, 3, 8, 9, 5, 3, 0, 6, 5, 0,
        2, 2], device='cuda:0'), 
 predict = tensor([3, 9, 9, 4, 0, 4, 5, 7, 1, 2, 4, 9, 2, 4, 3, 0, 6, 6, 5, 3, 7, 6, 5, 1,
        4, 3, 2, 7, 0, 8, 0, 8, 2, 7, 4, 8, 8, 1, 7, 3, 8, 9, 5, 3, 0, 6, 5, 0,
        2, 2], device='cuda:0')  
 accuracy = 1.0
i = 1, 
 lables = tensor([8, 5, 1, 5, 3, 0, 6, 8, 4, 7, 0, 8, 1, 6, 1, 2, 1, 5, 2, 0, 5, 1, 1, 2,
        8, 0, 6, 7, 5, 4, 3, 3, 7, 5, 9, 2, 3, 0, 1, 5, 8, 8, 7, 0, 7, 6, 2, 2,
        0, 5], device='cuda:0'), 
 predict = tensor([8, 5, 1, 5, 3, 0, 6, 8, 4, 7, 0, 8, 1, 6, 1, 2, 1, 5, 2, 0, 5, 1, 1, 2,
        8, 0, 6, 7, 5, 4, 3, 3, 7, 5, 9, 2, 3, 0, 1, 5, 8, 8, 7, 0, 7, 6, 2, 2,
        0, 5], device='cuda:0')  
 accuracy = 1.0


In [14]:
test_folders = glob.glob(f'./dataset/task2/test/*/*/')
class TestSet(data.Dataset):
    def __init__(self):
        self.length = len(test_folders)

    def __getitem__(self, index):
        folder = test_folders[index]
        rgb_img = crop_img(folder, False)
        img = np.zeros((3,200,200), dtype='uint8')
        row_begin = 100 - int(rgb_img.shape[1]/2)
        col_begin = 100 - int(rgb_img.shape[2]/2)
        img[:, row_begin:row_begin+rgb_img.shape[1], col_begin:col_begin+rgb_img.shape[2]] = rgb_img
        img = torch.from_numpy(img)
        return img, folder

    def __len__(self):
        return self.length

test_loader = data.DataLoader(TestSet(), batch_size=batch_size, shuffle=False)

In [15]:
imgNet.eval()
device = torch.device("cuda") if torch.cuda.is_available() else torch.device('cpu')
imgNet = imgNet.to(device)
predict_all = []
files_all = []
for i, (imgs, folder) in enumerate(test_loader):
    imgs = imgs.float().to(device)
    outputs = imgNet(imgs)
    predict = torch.argmax(F.softmax(outputs, dim=1), dim=1)
    # print(int(round(time.time() * 1000)))
    if i % 1 == 0:
        print(f"""i = {i}
        predict = {predict}""")
    predict_all += predict.tolist()
    files_all += list(folder)
print(files_all)
print(predict_all)

i = 0
        predict = tensor([6, 7, 8, 8, 3, 5, 8, 1, 3, 6, 8, 0, 9, 9, 1, 1, 3, 4, 3, 6, 0, 6, 1, 7,
        0, 9, 2, 7, 0, 2, 3, 6, 3, 8, 8, 2, 1, 2, 8, 9, 5, 2, 1, 5, 5, 3, 5, 8,
        0, 1, 8, 1, 7, 2, 1, 3, 0, 1, 5, 8, 9, 6, 2, 2], device='cuda:0')
i = 1
        predict = tensor([6, 4, 6, 7, 2, 7, 7, 0, 5, 8, 1, 3, 4, 9, 2, 4, 9, 0, 2, 5, 3, 1, 6, 7,
        6, 3, 4, 0, 3, 3, 9, 5, 6, 9, 2, 7, 1, 7, 9, 3, 1, 6, 9, 5, 8, 9, 5, 0,
        0, 6, 5, 3, 4, 2, 9, 8, 7, 1, 1, 7, 3, 6, 7, 1], device='cuda:0')
i = 2
        predict = tensor([2, 8, 4, 5, 6, 7, 6, 6, 5, 9, 1, 8, 4, 8, 2, 0, 3, 7, 5, 8, 5, 6, 8, 7,
        5, 1, 7, 8, 5, 5, 2, 7, 7, 6, 0, 9, 3, 5, 4, 5, 0, 7, 3, 6, 2, 6, 3, 0,
        3, 9, 5, 3, 3, 9, 7, 2, 0, 0, 8, 2, 8, 4, 3, 0], device='cuda:0')
i = 3
        predict = tensor([0, 7, 3, 5, 6, 7, 3, 8, 7, 2, 1, 5, 7, 1, 9, 6, 2, 8, 6, 2, 4, 1, 8, 7,
        1, 8, 5, 5, 2, 1, 7, 0, 5, 7, 5, 0, 7, 3, 8, 4, 9, 8, 3, 3, 7, 7, 2, 3,
        8, 9, 8, 6, 2, 7, 2, 3, 8, 6, 9, 8