# <div align='center'> Test Garbage </div>

In [17]:
import os
import json
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image
from torch.optim import SGD, Adam
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import transforms, models
from torchvision import datasets
from torch import optim
from torch.utils.data import (Dataset, DataLoader)
from k12libs.utils.nb_easy import K12AI_PRETRAINED_ROOT, K12AI_DATASETS_ROOT

In [3]:
dataset = 'rgarbage'
data_root = os.path.join(K12AI_DATASETS_ROOT, 'cv', dataset)

In [4]:
!cat $data_root/info.json

{
    "num_records": 11073,
    "num_classes": 4,
    "label_names": [
        "其他垃圾",
        "厨余垃圾",
        "可回收物",
        "有害垃圾"
    ],
    "mean": [
        0.6535,
        0.6132,
        0.5643
    ],
    "std": [
        0.2165,
        0.2244,
        0.2416
    ]
}

In [7]:
### 
with open(os.path.join(data_root, 'info.json'), 'r') as fr:
    items = json.load(fr)

### 
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),    # 数据增强: 对PIL Image数据做随机水平翻转
    transforms.ToTensor(),                     # PIL Image格式转换为Tensor张量格式               
    transforms.Normalize(items['mean'], items['std']) # 对数据归一化处理
])

In [8]:
class JsonfileDataset(Dataset):
    def __init__(self, data_root, json_file, resize=None, transform=None):
        self.data_root = data_root
        self.json_file = json_file
        self.resize = resize
        self.image_list, self.label_list = self.__read_jsonfile(json_file)
        if transform:
            self.transform = transform
        else:
            self.transform = transforms.Compose([transforms.ToTensor()])

    def __getitem__(self, index):
        img = Image.open(self.image_list[index]).convert('RGB')
        if self.resize:
            img = img.resize(self.resize)
        if self.transform is not None:
            img = self.transform(img)
        return img, self.label_list[index]

    def __len__(self):
        return len(self.image_list)

    def __read_jsonfile(self, jsonfile):
        image_list = []
        label_list = []
        with open(os.path.join(self.data_root, self.json_file)) as f:
            items = json.load(f)
            for item in items:
                image_list.append(os.path.join(self.data_root, item['image_path']))
                label_list.append(item['label'])
        return image_list, label_list

In [13]:
train_data = JsonfileDataset(data_root, 'train.json', transform=transform)
train_loader = DataLoader(train_data, batch_size=64, shuffle=True, drop_last=True) 

valid_data = JsonfileDataset(data_root, 'val.json', transform=transform)
valid_loader = DataLoader(valid_data, batch_size=64, shuffle=True, drop_last=False) 

In [15]:
model = models.resnet50(pretrained=False)
state = torch.load(os.path.join(K12AI_PRETRAINED_ROOT, 'cv', 'resnet50-19c8e357.pth'))
model.load_state_dict(state)
model.fc = nn.Linear(model.fc.in_features, 4)
model = model.to('cuda')

## 超参数调整

In [None]:
### 设置训练轮回(max_epoch)
max_epoch = 50

### 设置损失函数(交叉熵CE)
reduction = 'mean' # 约简方式为mean(张量各个维度上的元素的平均值)
criterion = nn.CrossEntropyLoss(reduction=reduction)

### 设置优化器(随机梯度下降SGD)
# optimizer = SGD(custom_model.parameters(),
#    lr=0.01,           # 基础学习率
#    weight_decay=1e-6, # 权重衰减, 使得模型参数值更小, 有效防止过拟合
#    momentum=0.9,      # 动量因子, 更快局部收敛
#    nesterov=True      # 使用Nesterov动量, 加快收敛速度
# )  

### 亚当
optimizer = Adam(
    filter(lambda p: p.requires_grad, model.parameters()), # 过程出可更新的层(参数)
    lr=0.001,           # 基础学习率
    betas=(0.9, 0.999), # 计算梯度的均值(0.9)和平方(0.999)的系数
    eps=1e-8,           # 为了防止分母除零, 分母加上非常小的值
    weight_decay=0,     # 权重衰减
    amsgrad=False,      # 是否使用AmsGrad变体
)

### 设置学习率衰减策略(可选, 固定步长衰减StepLR)
# scheduler = StepLR(optimizer,
#    step_size=2, # 每间隔2次epoch进行一次LR调整
#    gamma=0.6    # LR调整为原来0.6倍
# )                 
 
scheduler = ReduceLROnPlateau(
    optimizer,   # 优化器
    mode='min',  # 指定指标不再下降
    factor=0.1,  # 衰减因子
    patience=3,  # 容忍多少次(指标不改变)
    eps=1e-6,    # 学习率衰减到的最小值eps时,学习率不再改变
)

## 模型训练及反馈

In [None]:
def train_epoch(model, device, data_loader, criterion, optimizer, epoch):
    ### 模型进入训练状态(启用 BN 和 Dropout)
    model.train()
    for i, (data, target) in enumerate(data_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        # optimizer.zero_grad()
        if i % 30 == 0:
            print('Epoch:', epoch, ', Training Loss:', loss.item())
    print('Epoch:', epoch, ', Training Loss:', loss.item())
        
def valid_epoch(model, device, data_loader, criterion, epoch):
    ### 模型进入评估模式(禁用 BN 和 Dropou)
    model.eval()
    correct = 0
    with torch.no_grad():
        for i, (data, target) in enumerate(data_loader):
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)
            pred = torch.max(output, 1)[1]
            correct += (pred == target).sum().item()
            if i % 30 == 0:
                print('Epoch:', epoch, ', Validing Loss:', loss.item())
        ### 调整学习率
        scheduler.step(loss)
    ### 计算正确率
    acc = 100.0 * correct / len(data_loader.dataset)
    print('Epoch:', epoch, ', Validing Loss:', loss.item(), 'ACC', acc)
    return acc
    
def train(epoch_num, model, train_loader, valid_loader, criterion, optimizer, scheduler):
    ### 获取模型训练所用设备(cpu或者gpu)
    device = next(model.parameters()).device
    for epoch in range(0, epoch_num): 
        ### 训练模型
        train_epoch(model, device, train_loader, criterion, optimizer, epoch)
        ### 校验模型
        valid_epoch(model, device, valid_loader, criterion, epoch)

    ### 保存模型
    torch.save(model.state_dict(), "last.pt")
        
### 启动训练
train(max_epoch, model, train_loader, valid_loader, criterion, optimizer, scheduler)

Epoch: 0 , Training Loss: 0.33040550351142883
Epoch: 0 , Training Loss: 0.4162968099117279
Epoch: 0 , Training Loss: 0.4626573324203491
Epoch: 0 , Training Loss: 0.6340612173080444
Epoch: 0 , Training Loss: 0.40035709738731384
Epoch: 0 , Validing Loss: 0.5449092984199524
Epoch: 0 , Validing Loss: 0.4412495791912079
Epoch: 0 , Validing Loss: 0.49853211641311646 ACC 76.56391659111515
Epoch: 1 , Training Loss: 0.392650842666626
Epoch: 1 , Training Loss: 0.4689213037490845
Epoch: 1 , Training Loss: 0.41451287269592285
Epoch: 1 , Training Loss: 0.4308861494064331
Epoch: 1 , Training Loss: 0.5866461396217346
Epoch: 1 , Validing Loss: 0.6984360218048096
Epoch: 1 , Validing Loss: 0.5036810636520386
Epoch: 1 , Validing Loss: 0.7667464017868042 ACC 73.52674524025386
Epoch: 2 , Training Loss: 0.5381112098693848
Epoch: 2 , Training Loss: 0.3231768012046814
Epoch: 2 , Training Loss: 0.6217946410179138
Epoch: 2 , Training Loss: 0.4079105257987976
Epoch: 2 , Training Loss: 0.2976001501083374
Epoch: 2

## 模型评估及测试

In [None]:
### 加载测试数据集
test_dataset = k12ai_load_dataset(data_root, 'test.json')
test_loader  = DataLoader(test_dataset, batch_size=64, num_workers=4)

### 加载训练完成的模型
model.load_state_dict(torch.load('last.pt'))

def evaluate(model, data_loader):
    model.eval()
    correct = 0
    with torch.no_grad():
        for data, target in data_loader:
            output = model(data)
            pred = torch.max(output, 1)[1]
            correct += (pred == target).sum().item()
    ### 计算正确率
    acc = 100.0 * correct / len(data_loader.dataset)
    return acc

### 启动评估
acc = evaluate(last_model, test_loader)
print("Acc:", acc)