In [None]:
!pip install git+https://github.com/d2l-ai/d2l-zh@release  # installing d2l

    
    
    

In [None]:
import torch
import pandas as pd
import numpy as np
import os

import torchvision
from torchvision import datasets, models, transforms
import torch.nn as nn
from torch.nn import functional as F
from torch.utils import data
import torch.optim as optim

from torchvision.io import read_image
from PIL import Image
from torch.utils.data import Dataset,DataLoader
from torchvision.transforms import ToTensor, Lambda

import cv2
from matplotlib import pyplot as plt
import wandb

from d2l import torch as d2l
import time



In [None]:
# reset_index(drop=True, inplace=True)

root="../input/d2lclassifyleaves/"

test_data = pd.read_csv(root+"test.csv")
all_data = pd.read_csv(root+"train.csv")
# train_data = all_data.sample(n=int(len(all_data)*0.9),ignore_index=True)#随机选取
# val_data = all_data.sample(n=int(len(all_data)*0.1),ignore_index=True)#ignored_index忽略原来的index，重新生成有序index
#使用交叉验证，先不分割

all_data=all_data.sample(n=len(all_data),ignore_index=True)#shuffle
classes = all_data['label'].unique().tolist()#提取lebel列表
print("all_data:",all_data.shape,"test_data shape:",test_data.shape,"\nlabel size:", len(classes))


In [None]:
class MyDataset(Dataset):#继承自Dataset
    def __init__(self, labels, img_dir, mode=None):
        super().__init__()
        self.mode=mode
        self.img_labels =labels
        self.img_dir = img_dir
        if mode=='train':
            preprocess = transforms.Compose([
                        transforms.Resize(256),
                        transforms.CenterCrop(224),#裁剪留下中央方块
                        transforms.RandomHorizontalFlip(p=.25),#水平翻转
                        transforms.RandomVerticalFlip(p=.5),#垂直翻转
                        transforms.ToTensor(),
                        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),])
                            #归一化，数据来自imageNet
            #pred与eval图片处理一致
        elif mode=='pred':
            preprocess = transforms.Compose([
                        transforms.Resize(256),
                        transforms.CenterCrop(224),
                        transforms.ToTensor(),
                        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),])
        elif mode=='eval':
            preprocess = transforms.Compose([
                        transforms.Resize(256),
                        transforms.CenterCrop(224),
                        transforms.ToTensor(),
                        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),])
        self.transform = preprocess

    def __len__(self):
        return len(self.img_dir)

    def __getitem__(self, idx):
        img_path = os.path.join(root,self.img_dir[idx])
        with Image.open(img_path) as im:
            image = im
            if self.transform:
                image = self.transform(image)
            if self.mode!='pred':
                label = self.img_labels.iloc[idx]

                label =torch.tensor(classes.index(label))#类别字符串转换为下标
                return image, label
            else:#pred只有image
                return image


In [None]:
def get_k_fold_data(k,i,data,bs):#返回DataLoader
  assert k>1
  fold_size=data.shape[0]//k
  test=data.iloc[i*fold_size:(i+1)*fold_size]
  train=pd.concat([data.iloc[0:i*fold_size],data.iloc[(i+1)*fold_size:data.shape[0]]],ignore_index=True)
    #注意被连接部分要放在列表里,注意index，不重排会越界
  train_data=MyDataset(train['label'], train['image'], 'train')
  test_data=MyDataset(test['label'], test['image'], 'eval')
  return DataLoader(train_data, batch_size=bs, shuffle=True),DataLoader(test_data,batch_size=bs,shuffle=False)

In [None]:
def k_fold_train(k,all_data,num_epochs,learning_rate,batch_size,weight_decay,device):
  train_acc_sum,valid_acc_sum=0,0
  for i in range(k):
    data=get_k_fold_data(k,i,all_data,batch_size)
    train_acc,valid_acc=train(net,*data,num_epochs,learning_rate,weight_decay,device)
    #取最后一个epoch的准确率作为每一折的准确率
    train_acc_sum+=train_acc
    valid_acc_sum+=valid_acc
  return train_acc_sum/k,valid_acc_sum/k#返回每折的平均准确率

In [None]:
def k_fold_train_single(k,all_data,num_epochs,learning_rate,batch_size,weight_decay,device):
    train_acc_sum,valid_acc_sum=0,0
    data=get_k_fold_data(k,0,all_data,batch_size)
    train_acc,valid_acc=train(net,*data,num_epochs,learning_rate,weight_decay,device)
    #取最后一个epoch的准确率作为每一折的准确率
    train_acc_sum+=train_acc
    valid_acc_sum+=valid_acc
    return train_acc_sum/k,valid_acc_sum/k#返回每折的平均准确率


In [None]:
def accuracy(y_hat, y):
    """计算预测正确的样本数"""

    if y_hat.shape[0] > 1 and y_hat.shape[1] > 1:  # 二维数组，默认向量是列形式的，不能按行求和
        y_hat = torch.argmax(y_hat, axis=1)  # 取出最大概率值的下标
        
    cmp = y_hat.type(y.dtype) == y  # 布尔矩阵
    return cmp.sum()#去掉原有的float转换，消耗很大

def evaluate_accuracy_gpu(net, data_iter, device=None):
    """使用GPU计算模型在数据集上的精度
    Defined in :numref:`sec_lenet`"""
    if isinstance(net, nn.Module):
        net.eval()  # 设置为评估模式
        if not device:
            device = next(iter(net.parameters())).device
    # 正确预测的数量，总预测的数量
    metric = d2l.Accumulator(2)
    with torch.no_grad():
        for X, y in data_iter:
            if isinstance(X, list):
                # BERT微调所需的（之后将介绍）
                X = [x.to(device) for x in X]
            else:
                X = X.to(device)
            y = y.to(device)
            metric.add(accuracy(net(X), y), d2l.size(y))
    return metric[0] / metric[1]




In [None]:
class Accumulator:#Accumulator也需要取消float转换以加速
    """在n个变量上累加"""
    def __init__(self, n):
        """Defined in :numref:`sec_softmax_scratch`"""
        self.data = [0.0] * n

    def add(self, *args):
        self.data = [a + b for a, b in zip(self.data, args)]
        

    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

In [None]:
def train(net, train_iter, test_iter, num_epochs, lr, weight_dacay,device):
#     def init_weights(m):
#         if type(m) == nn.Linear or type(m) == nn.Conv2d:
#             nn.init.xavier_uniform_(m.weight)
#     net.apply(init_weights)
    print('training on', device)
    net.to(device)
#     （Adam+weight decay）
    optimizer = torch.optim.Adam(net.parameters(),lr = lr,weight_decay=weight_dacay)

#     optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss = nn.CrossEntropyLoss()
    timer, num_batches = d2l.Timer(), len(train_iter)
    for epoch in range(num_epochs):
        # 训练损失之和，训练准确率之和，样本数
        metric = Accumulator(3)
        net.train()
        for i, (X, y) in enumerate(train_iter):
            timer.start()
            optimizer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            optimizer.step()
            with torch.no_grad():

                metric.add(X.shape[0]*l,accuracy(y_hat,y), X.shape[0])

            timer.stop()
            train_l = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]
        test_acc = evaluate_accuracy_gpu(net, test_iter)
        wandb.log({'train_acc':train_acc,'test_acc':test_acc,'loss':train_l})
    print(f'loss {train_l:.3f}, train acc {train_acc:.3f}, '
          f'test acc {test_acc:.3f}')
    print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec '
          f'on {str(device)}')
    wandb.finish()
    return train_acc,test_acc#返回最后一个epoch的准确率

In [None]:
# import wandb#启动多个记录，本例中只启动第一折的
# for x in range(10):    
#     run = wandb.init(reinit=True)    with run:        
#         for y in range(100):           
#             run.log({"metric": x+y})


In [None]:
num_epochs, lr, bs, wd = 20, 1e-3, 128, 1e-4
k=5

wandb.init()
net=models.resnet18(pretrained=True)
wandb.watch(net)#中止时按上面的按钮，按cell的会导致wandb下次运行前出错,用except关闭更保险
try:
    net.fc = nn.Linear(net.fc.in_features,len(classes))#diao'zh'n
    k_fold_train_single(k,all_data,num_epochs,lr,bs,weight_decay=wd,device='cuda:0')
except:
    wandb.finish()
    raise



In [None]:
def train(net, train_iter, test_iter, num_epochs, lr, weight_dacay,device):
#     def init_weights(m):
#         if type(m) == nn.Linear or type(m) == nn.Conv2d:
#             nn.init.xavier_uniform_(m.weight)
#     net.apply(init_weights)
    print('training on', device)
    net.to(device)
#     （Adam+weight decay）
    optimizer = torch.optim.Adam(net.parameters(),lr = lr,weight_decay=weight_dacay)
    scheduler=torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer,3,2)#间隔递增，使得最后很长一段学习率都是下降状态
# T_0:学习率第一次回到初始值的epoch位置
# T_mult:这个控制了学习率变化的速度，平方和递增
#     optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss = nn.CrossEntropyLoss()
    timer, num_batches = d2l.Timer(), len(train_iter)
    max_acc=0
    for epoch in range(num_epochs):
        # 训练损失之和，训练准确率之和，样本数
        metric = Accumulator(3)
        net.train()
        for i, (X, y) in enumerate(train_iter):
            timer.start()
            optimizer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            optimizer.step()
            scheduler.step(epoch + i / num_batches)#使同一个batch里的学习率也不同
            with torch.no_grad():

                metric.add(X.shape[0]*l,accuracy(y_hat,y), X.shape[0])

            timer.stop()
            train_l = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]
            
        #scheduler.step()

        test_acc = evaluate_accuracy_gpu(net, test_iter)
        if (test_acc-max_acc)>0.01:#保存参数
            max_acc=test_acc
            torch.save(net.state_dict(),str(int(max_acc*10000)))
        wandb.log({'train_acc':train_acc,'test_acc':test_acc,'loss':train_l})
    print(f'loss {train_l:.3f}, train acc {train_acc:.3f}, '
          f'test acc {test_acc:.3f}')
    print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec '
          f'on {str(device)}')
    wandb.finish()
    return train_acc,test_acc#返回最后一个epoch的准确率

In [None]:
num_epochs, lr, bs, wd = 20, 1e-3, 128, 1e-4
k=5

wandb.init()
net=models.resnet18(pretrained=True)
wandb.watch(net)#中止时按上面的按钮，按cell的会导致wandb下次运行前出错,用except关闭更保险
try:
    net.fc = nn.Linear(net.fc.in_features,len(classes))#diao'zh'n
    k_fold_train_single(k,all_data,num_epochs,lr,bs,weight_decay=wd,device='cuda:0')#
except:
    wandb.finish()
    raise

In [None]:
torch.save(net.state_dict(), '9596')

In [None]:
!ls

In [None]:
# net=models.resnet18(pretrained=True)
# net.fc = nn.Linear(net.fc.in_features,len(classes))#diao'zh'n
# net.load_state_dict(torch.load('9596'))
net.to('cpu')
net.eval()#注意设为评估模式
test_data = pd.read_csv(root+"test.csv")
pred_data=MyDataset(None,test_data['image'], 'pred')#None占位

data=DataLoader(pred_data,batch_size=128, shuffle=False)
print(type(data))
# submission = pd.concat([test_data['image'], test_data['label']], axis=1)
# submission.to_csv('submission.csv', index=False)
classes=np.array(classes)#使得能够被离散列表下标访问
pred=[]
for X in data:
    Y=torch.argmax(net(X), axis=1)
    
    pred.append(classes[list(Y.detach())])
test_data['label'] = pd.Series(pred)
submission = pd.concat([test_data['image'], test_data['label']], axis=1)
submission.to_csv('submission.csv', index=False)

In [None]:
!ls