### 说明：

在'/home/tpg/Datasets/az_train_val/'文件夹下有train,val两个文件夹，其中train下有az和notaz两类图片。

要做什么？

1. 读取数据，形成train_data_iter和val_data_iter

2. 创建Alex模型(通过torchvision.model_zoo),更改最后一层的分类数和学习率即可。

3. 训练

In [1]:
import collections
import math

import os
import shutil
import time
import zipfile

import sys
sys.path.append('../d2lzh/')
import d2lzh_pytorch as d2l

import torch
import torchvision

In [2]:
### 训练图像增广

transform_train = torchvision.transforms.Compose([
        torchvision.transforms.RandomResizedCrop(224, 
                                                 scale=(0.08, 1.0),
                                                ratio=(3.0/4.0, 4.0/3.0)),
        torchvision.transforms.RandomHorizontalFlip(),
        torchvision.transforms.ColorJitter(brightness=0.4, 
                                          contrast=0.4,
                                          saturation=0.4),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(
            [0.485, 0.456, 0.406],
            [0.229, 0.224, 0.225],
        ),
        
    ])


In [3]:
demo = False
data_dir = '/home/tpg/Datasets/az_train_val/'
batch_size = 32

In [4]:
### 读取数据集，并完成封装iter.

from torchvision.datasets import ImageFolder as IF
from torch.utils.data import DataLoader as DL

train_data_path = os.path.join(data_dir, 'train')
train_data = IF(train_data_path, transform=transform_train)

valid_data_path = os.path.join(data_dir, 'valid')
valid_data = IF(valid_data_path, transform=transform_train)

# train_valid_data_path = os.path.join(data_dir, 'train_valid')
# train_valid_data = IF(train_valid_data_path, transform=transform_train)

# test_data_path = os.path.join(data_dir, input_dir, test_dir)
# test_data = IF(test_data_path, transform=transform_test)

# 封装
train_iter = DL(train_data, batch_size, shuffle=True)
valid_iter = DL(valid_data, batch_size, shuffle=True)
# train_valid_iter = DL(train_valid_data, batch_size, shuffle=True)
# test_iter = DL(test_data, batch_size, shuffle=False)

In [5]:
### 定义模型
# 采用fine tuning的思想来进行迁移学习

from torchvision import models
from torch import nn, optim

pretrained_net = models.resnet18(pretrained=True) # 下载pretrained模型

# print(pretrained_net)
# print(pretrained_net.fc)
pretrained_net.fc = nn.Linear(512, 2)  # 参数已经完成初始化，但其他层的参数依旧保持不变
# print(pretrained_net.fc)

In [6]:
### 更改模型的学习率（前面参数的学习率较低、最后一层参数的学习率较高）

# 先列出参数
output_params = list(map(id, pretrained_net.fc.parameters()))
# print(output_params)

# 过滤出其他参数
feature_params = filter(lambda p: id(p) not in output_params, pretrained_net.parameters())
# print(feature_params)

lr = 0.01
optimizer = optim.SGD([
        {'params': feature_params},
        {'params': pretrained_net.fc.parameters(), 'lr':lr * 10}],
        lr=lr, weight_decay=0.001,
    )

In [7]:
### 定义训练函数


def train(net, train_iter, valid_iter, loss, optimizer, num_epochs, device):

    net = net.to(device)
    print("training on:", device)
    batch_count = 0

    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, bs = 0.0, 0.0, 0
        start = time.time()

        for X, y in train_iter:
            X = X.to(device)
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()

            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            bs += y.shape[0]
            batch_count += 1

        if valid_iter is not None:
            valid_acc = d2l.evaluate_accuracy(valid_iter, net)
            acc_str = ("train acc %.7f, valid acc %.7f," %
                       (train_acc_sum / bs, valid_acc))
        else:
            acc_str = ("train acc %.7f," % (train_acc_sum / bs))

        print("epoch %d, train loss %.7f," %
              (epoch + 1, train_l_sum / batch_count) + acc_str + "time %.2f" %
              (time.time() - start))

In [None]:
### 开始训练(暂时先不测试)

loss = torch.nn.CrossEntropyLoss()
device = 'cuda' if torch.cuda.is_available() else 'cpu'
num_epochs = 200

train(pretrained_net, train_iter, valid_iter, loss, optimizer, num_epochs, device)

# 保存
PATH = "./pretrained_resnet18_my.pth"
torch.save(pretrained_net.state_dict(), PATH)


training on: cuda
epoch 1, train loss 3.0427241,train acc 0.5468750, valid acc 0.6240000,time 33.16
epoch 2, train loss 0.9718658,train acc 0.5183594, valid acc 0.5490000,time 32.96


### 测试阶段

#### 准备数据集 ——  加载模型 —— 预测结果

In [None]:
# 进入测试环节（用所有训练集训练，所有的测试集进行测试）

import os
import torch
import torchvision
from torchvision import models
from torch import nn, optim

demo = False
data_dir = '/home/tpg/Datasets/az_train_val/'
test_dir = "test/"
PATH = "./pretrained_resnet18_my.pth"
batch_size = 32

#### 准备数据集
transform_test = torchvision.transforms.Compose([
    torchvision.transforms.Resize(256),
    torchvision.transforms.CenterCrop(224),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(
        [0.485, 0.456, 0.406],
        [0.229, 0.224, 0.225],
    ),
])
# 封装
test_data_path = os.path.join(data_dir, test_dir)
test_data = IF(test_data_path, transform=transform_test)
test_iter = DL(test_data, batch_size, shuffle=False)


In [None]:
#### 加载模型

model = models.resnet18(pretrained=False)
model.fc = nn.Linear(512, 2)
model.load_state_dict(torch.load(PATH))
# print(model.state_dict())

In [None]:
#### 预测结果

preds = []
device = 'cuda' if torch.cuda.is_available() else 'cpu'
for X, _ in test_iter:
    X = X.to(device)
    model = model.to(device)
    y_hat = model(X)
    preds.extend(y_hat.argmax(dim=1).cpu())   # 将每个样本的预测值计算出来。预测结果如 0,1,2,...,9
print(preds[0])
# print(preds)

In [None]:
#### 映射到具体类

ids = sorted(os.listdir(os.path.join(data_dir, test_dir,
                                     "unknown")))  # 读取test/uknown文件夹下的所有文件并排序
# print(ids)

preds_str = []
for i in preds:
    if i == 0:
        preds_str.append(train_data.classes[0])
    elif i == 1:
        preds_str.append(train_data.classes[1])            
# print(preds_str)

with open("/home/tpg/Datasets/az_train_val/submission.csv", 'w') as f:
    f.write('id,' + "result" + '\n')
    for i, pred in zip(ids, preds_str):
        img_name = i.split('.')[0]
        f.write(img_name + ',' + pred + '\n')

print("success")
        
# import pandas as pd

# sorted_ids = list(range(1, len(test_data) + 1))   # 生成1,2,3,..., n的数组
# # print(sorted_ids)
# sorted_ids.sort(key=lambda x: str(x))   # 将数组以首数字进行排序：1, 10, 100, 101, 102, ...
# # print(sorted_ids)

# df = pd.DataFrame({'id': sorted_ids, 'label':preds})
# print(df)

# df['label'] = df['label'].apply(lambda x: train_data.classes[x])
# print(df)