In [3]:
import os
import numpy as np
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import pandas as pd
from torch.utils.data import DataLoader,Dataset
import time
os.environ['CUDA_VISIBLE_DEVICES']= "1"
GpuNum = torch.cuda.device_count()
print(torch.cuda.is_available())

False


In [1]:
###Read image
###利用 OpenCV (cv2) 输入照片并存放在 numpy array 中
def readfile(path, label):
    # label 是一个 boolean variable，代表需不需要回传 y 值
    image_dir = sorted(os.listdir(path))
    x = np.zeros((len(image_dir), 128, 128, 3), dtype=np.uint8)##x的输入初始化
    y = np.zeros((len(image_dir)), dtype=np.uint8)##y的输出初始化
    for i, file in enumerate(image_dir):##遍历每张图片
        img = cv2.imread(os.path.join(path, file))
        x[i, :, :] = cv2.resize(img,(128, 128))##输入img，输出128*128尺寸的图片
        if label:
            y[i] = int(file.split("_")[0])
    if label:
        return x, y
    else:
        return x

#分別將 training set、validation set、testing set 用 readfile 函式连接起來
workspace_dir = 'E:\\BaiduNetdiskDownload\\food-11'
print("Reading data")
train_x, train_y = readfile(os.path.join(workspace_dir, "training"), True)
print("Size of training data = {}".format(len(train_x)))
val_x, val_y = readfile(os.path.join(workspace_dir, "validation"), True)
print("Size of validation data = {}".format(len(val_x)))
test_x = readfile(os.path.join(workspace_dir, "testing"), False)
print("Size of Testing data = {}".format(len(test_x)))


Reading data


NameError: name 'os' is not defined

In [None]:
#training 时做 data augmentation，目的是为了增加输入的数据量，防止过拟合
train_transform = transforms.Compose([
    transforms.ToPILImage(),#PIL：python image libary,python的图像库，将tensor类型或者ndarray转换成PIL图片或者将 CxHxW大小的torch.*Tensor或者ＨxWxC 大小的numpy 矩阵转成PIL图片。
    transforms.RandomHorizontalFlip(), #随机将照片水平翻转
    transforms.RandomRotation(15), #随机旋转图片
    transforms.ToTensor(), #将照片转成 Tensor，并把数值normalize到[0,1](data normalization)
])
#testing 时不做 data augmentation
test_transform = transforms.Compose([
    transforms.ToPILImage(),#PIL：python image libary,python的图像库，将tensor类型或者ndarray转换成PIL图片将 CxHxW大小的torch.*Tensor或者ＨxWxC 大小的numpy 矩阵转成PIL图片
    transforms.ToTensor(), #将照片转成 Tensor，并把数值normalize到[0,1](data normalization)
])


class ImgDataset(Dataset):
    def __init__(self, x, y=None, transform=None):
        self.x = x
        # label is required to be a LongTensor
        self.y = y
        if y is not None:
            self.y = torch.LongTensor(y)
        self.transform = transform
    def __len__(self):
        return len(self.x)
    def __getitem__(self, index):
        X = self.x[index]
        if self.transform is not None:
            X = self.transform(X)
        if self.y is not None:
            Y = self.y[index]
            return X, Y
        else:
            return X
        

batch_size = 32
train_set = ImgDataset(train_x, train_y, train_transform)
val_set = ImgDataset(val_x, val_y, test_transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

<font size=7 face="黑体">Model:</font>

In [None]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        #torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        #torch.nn.MaxPool2d(kernel_size, stride, padding)
        #input 維度 [3, 128, 128]  输入128*128的深度为3的图像
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),  # in_channels 输入图像的深度为3
                                        #out_channels 输出图像的深度，即卷积核的个数
                                        #kernel_size 卷积核大小，正方形卷积只为单个数字，此式的卷积核大小为3*3
                                        #stride 卷积步长，默认为1
                                        #padding 卷积是否造成尺寸丢失，1为不丢失
            nn.BatchNorm2d(64),#num_features为输入batch中图像的channle数（in_channels），返回一个shape与num_features相同的tensor。
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [256, 16, 16]

            nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]
            
            nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]
        )
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),#用于设置网络中的全连接层，从输入输出的张量的shape角度来理解，相当于一个输入为[batch_size, in_features]的张量变换成了
                                     #[batch_size, out_features]的输出张量。
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1) #将cnn的output转化为512行*（不清楚多少列）的二维tensor
        return self.fc(out)

<font size=7 face="黑体">Training:</font>

In [None]:
model = Classifier().cuda()
loss = nn.CrossEntropyLoss() # 因為是 classification task，所以 loss 使用 CrossEntropyLoss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # optimizer 使用 Adam
num_epoch = 50

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0
    
    ##在训练模块中千万不要忘了写model.train()
    ##在评估（或测试）模块千万不要忘了写model.eval()
    model.train() #启用BatchNormalization和Dropout，将BatchNormalization和Dropout置为True
    for i, data in enumerate(train_loader):
        optimizer.zero_grad() # 用 optimizer 将 model 参数的 gradient 调零
        train_pred = model(data[0].cuda()) # 利用 model 得到预测的概率分布 这边实际上就是去呼叫 model 的 forward 函数
        batch_loss = loss(train_pred, data[1].cuda()) # 计算 loss （注意 prediction 跟 label 必须同时在 CPU 或是 GPU 上）
        batch_loss.backward() # 利用 back propagation 算出每个参数的 gradient
        optimizer.step() # 以 optimizer 用 gradient 更新参数值

        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
        train_loss += batch_loss.item()
    model.eval()#不启用 BatchNormalization 和 Dropout，将BatchNormalizati
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            val_pred = model(data[0].cuda())
            batch_loss = loss(val_pred, data[1].cuda())

            val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
            val_loss += batch_loss.item()

        #將結果 print 出來
        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
            (epoch + 1, num_epoch, time.time()-epoch_start_time, \
             train_acc/train_set.__len__(), train_loss/train_set.__len__(), val_acc/val_set.__len__(), val_loss/val_set.__len__()))

In [None]:
train_val_x = np.concatenate((train_x, val_x), axis=0)
train_val_y = np.concatenate((train_y, val_y), axis=0)
train_val_set = ImgDataset(train_val_x, train_val_y, train_transform)
train_val_loader = DataLoader(train_val_set, batch_size=batch_size, shuffle=True)

model_best = Classifier().cuda()
loss = nn.CrossEntropyLoss() # 因为是 classification task，所以 loss 使用 CrossEntropyLoss
optimizer = torch.optim.Adam(model_best.parameters(), lr=0.001) # optimizer 使用 Adam
num_epoch = 50


for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0

    model_best.train()
    with troch.no_grad():
        for i, data in enumerate(train_val_loader):
            optimizer.zero_grad()
            train_pred = model_best(data[0].cuda())
            batch_loss = loss(train_pred, data[1].cuda())
            batch_loss.backward()
            optimizer.step()

            train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
            train_loss += batch_loss.item()

        #将结果 print 出來
        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f' % \
          (epoch + 1, num_epoch, time.time()-epoch_start_time, \
          train_acc/train_val_set.__len__(), train_loss/train_val_set.__len__()))

<font size=7 face="黑体">Testing（用刚刚train好的model进行predict):</font>

In [None]:
test_set = ImgDataset(test_x, transform=test_transform)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

model_best.eval()
prediction = []
with torch.no_grad():
    for i, data in enumerate(test_loader):
        test_pred = model_best(data.cuda())
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        for y in test_label:
            prediction.append(y)
            
            
#将结果存入 csv 中
with open("predict.csv", 'w') as f:
    f.write('Id,Category\n')
    for i, y in  enumerate(prediction):
        f.write('{},{}\n'.format(i, y))