In [1]:
import collections
import math

import os
import shutil
import time
import zipfile

import sys
sys.path.append('../d2lzh/')
import d2lzh_pytorch as d2l

import torch
import torchvision

In [2]:
# 设置demo是为了用部分数据去训练模型（原数据太多）
# 使用500张图片进行测试，50张图片进行验证
demo = False
data_dir = '../Datasets/kaggle_dog/'

In [3]:
### 整理数据集

def reorg_train_valid(data_dir, train_dir, input_dir, valid_ratio, idx_label):
    """
    Function:
        将每一类狗狗的照片归类这类狗狗的文件夹下。
    Args:
        valid_ratio: 验证集中，每类狗的样本数与原始数据集中包含狗的样本数最少的那一类的样本数之比。
        idx_label: 字典结构。
    Return:
        None
    """
    
    # 找出包含狗的样本数最少的那一类
    min_n_train_per_label = (
        collections.Counter(idx_label.values()).most_common()[:-2:-1][0][1]
    )
    
    # 验证集中每类狗的样本数
    n_valid_per_label = math.floor(min_n_train_per_label * valid_ratio)
    label_count = {}
    for train_file in os.listdir(os.path.join(data_dir, train_dir)):
        idx = train_file.split('.')[0]
        label = idx_label[idx]
        d2l.mkdir_if_not_exist([data_dir, input_dir, 'train_valid', label])
        shutil.copy(os.path.join(data_dir, train_dir, train_file),
                   os.path.join(data_dir, input_dir, 'train_valid', label))
        if label not in label_count or label_count[label] < n_valid_per_label:
            d2l.mkdir_if_not_exist([data_dir, input_dir, 'valid', label])
            shutil.copy(os.path.join(data_dir, train_dir, train_file), 
                       os.path.join(data_dir, input_dir, 'valid', label))
            label_count[label] = label_count.get(label, 0) + 1
        
        else:
            d2l.mkdir_if_not_exist([data_dir, input_dir, 'train', label])
            shutil.copy(os.path.join(data_dir, train_dir, train_file),
                       os.path.join(data_dir, input_dir, 'train', label))

def reorg_dog_data(data_dir, label_dir, train_dir, test_dir, input_dir, valid_ratio):
    
    with open(os.path.join(data_dir, label_dir), 'r') as f:
        lines = f.readlines()[1:]
        tokens = [l.rstrip().split(',') for l in lines]
        idx_label = dict(((idx, label) for idx, label in tokens))
    
    reorg_train_valid(data_dir, train_dir, input_dir, valid_ratio, idx_label)
    
    d2l.mkdir_if_not_exist([data_dir, input_dir, 'test', 'unknown'])
    
    for test_file in os.listdir(os.path.join(data_dir, test_dir)):
        shutil.copy(os.path.join(data_dir, test_dir, test_file),
                   os.path.join(data_dir, input_dir, 'test', 'unknown'))

In [4]:
      
if demo:
    input_dir, batch_size = 'train_valid_test_tiny', 1
else:
    label_file, train_dir, test_dir = 'labels.csv', 'train', 'test'
    input_dir, batch_size, valid_ratio = 'train_valid_test', 16, 0.1
#     reorg_dog_data(data_dir, label_file, train_dir, test_dir, input_dir, valid_ratio)

In [5]:
### 图像增广

transform_train = torchvision.transforms.Compose([
        torchvision.transforms.RandomResizedCrop(224, 
                                                 scale=(0.08, 1.0),
                                                ratio=(3.0/4.0, 4.0/3.0)),
        torchvision.transforms.RandomHorizontalFlip(),
        torchvision.transforms.ColorJitter(brightness=0.4, 
                                          contrast=0.4,
                                          saturation=0.4),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(
            [0.485, 0.456, 0.406],
            [0.229, 0.224, 0.225],
        ),
        
    ])

transform_test = torchvision.transforms.Compose([
        torchvision.transforms.Resize(256),
        torchvision.transforms.CenterCrop(224),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(
            [0.485, 0.456, 0.406],
            [0.229, 0.224, 0.225],
        ),
    ])

In [6]:
### 读取数据集

from torchvision.datasets import ImageFolder as IF
from torch.utils.data import DataLoader as DL

train_data_path = os.path.join(data_dir, input_dir, 'train')
# train_data = IF(train_data_path, transform=transform_train)

valid_data_path = os.path.join(data_dir, input_dir, 'valid')
# valid_data = IF(valid_data_path, transform=transform_train)

train_valid_data_path = os.path.join(data_dir, input_dir, 'train_valid')
# train_valid_data = IF(train_valid_data_path, transform=transform_train)

test_data_path = os.path.join(data_dir, input_dir, test_dir)
test_data = IF(test_data_path, transform=transform_test)

# 封装
# train_iter = DL(train_data, batch_size, shuffle=True)
# valid_iter = DL(valid_data, batch_size, shuffle=True)
# train_valid_iter = DL(train_valid_data, batch_size, shuffle=True)
test_iter = DL(test_data, batch_size, shuffle=False)

In [7]:
### 定义模型
# 由于dog数据集是ImageNet的子集，所有采用fine tuning的思想来进行迁移学习

from torchvision import models
from torch import nn, optim

pretrained_net = models.resnet34(pretrained=True) # 下载模型(如果为True)

# print(pretrained_net)
# print(pretrained_net.fc)
pretrained_net.fc = nn.Linear(512, 120)  # 参数已经完成初始化，但其他层的参数依旧保持不变
# print(pretrained_net.fc)

In [8]:
### 更改模型的学习率（前面参数的学习率较低、最后一层参数的学习率较高）

# 先列出参数
output_params = list(map(id, pretrained_net.fc.parameters()))
# print(output_params)

# 过滤出其他参数
feature_params = filter(lambda p: id(p) not in output_params, pretrained_net.parameters())
# print(feature_params)

lr = 0.01
optimizer = optim.SGD([
        {'params': feature_params},
        {'params': pretrained_net.fc.parameters(), 'lr':lr * 10}],
        lr=lr, weight_decay=0.001,
    )

In [9]:
### 定义训练函数

def train(net, train_iter, valid_iter, loss, optimizer, num_epochs, device):
    
    net = net.to(device)
    print("training on:", device)
    batch_count = 0
    
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, bs = 0.0, 0.0, 0        
        start = time.time()
        
        for X, y in train_iter:
            X = X.to(device)
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            
            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            bs += y.shape[0]
            batch_count += 1
            
        if valid_iter is not None:
            valid_acc = d2l.evaluate_accuracy(valid_iter, net)
            acc_str = ("train acc %.4f, valid acc %.4f," 
                        % (train_acc_sum / bs, valid_acc))
        else:
            acc_str = ("train acc %.4f," % (train_acc_sum / bs))
            
        print("epoch %d, train loss %.3f," % (epoch + 1, train_l_sum / batch_count) 
              + acc_str + "time %.2f" % (time.time() - start))
            

In [10]:
### 开始训练(暂时先不测试)

loss = torch.nn.CrossEntropyLoss()
device = 'cuda' if torch.cuda.is_available() else 'cpu'
num_epochs = 200

# train(pretrained_net, train_iter, valid_iter, loss, optimizer, num_epochs, device)

# 保存
# PATH = "./pretrained_resnet34_my.pth"
# torch.save(pretrained_net.state_dict(), PATH)

#　加载
# model = models.resnet34(pretrained=False) 
# model.fc = nn.Linear(512, 120)
# model.load_state_dict(torch.load(PATH))
# model.state_dict()

In [None]:
# 进入测试环节（用所有训练集训练，所有的测试集进行测试）

# import pandas as pd


# #　加载
# model = models.resnet34(pretrained=False) 
# model.fc = nn.Linear(512, 120)
# model.load_state_dict(torch.load(PATH))
# model.state_dict()

# train(pretrained_net, train_valid_iter, None, loss, optimizer, num_epochs, device)

# 保存
PATH = "./pretrained_resnet34_my.pth"
# torch.save(pretrained_net.state_dict(), PATH)

#　加载
model = models.resnet34(pretrained=False) 
model.fc = nn.Linear(512, 120)
model.load_state_dict(torch.load(PATH))
# model.state_dict()

preds = []
for X, _ in test_iter:
    y_hat = model(X)
    preds.extend(y_hat.cpu())   # 将每个样本的预测值计算出来。预测结果如 0,1,2,...,9
    

print(preds[0])

ids = sorted(os.listdir(os.path.join(data_dir, input_dir, 'test/unknown')))
with open("submission.csv", 'w') as f:
    f.write('id,' + ",".join(train_valid_data.classes) + '\n')
    for i, output in zip(ids, preds):
        f.write(i.split('.')[0] + ',' + ','.join([str(num) for num in output]) + '\n')
