In [17]:
import time
from torch.utils.tensorboard import SummaryWriter
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import DataLoader
import torchvision.models as models
import torch.nn as nn
import torch

device = "cpu"

print("是否使用GPU训练：{}".format(torch.backends.mps.is_available()))    #打印是否采用gpu训练
if torch.backends.mps.is_available():
    device = torch.device("mps");  #打印相应的gpu信息
#数据增强太多也可能造成训练出不好的结果，而且耗时长，宜增强两三倍即可。
normalize=transforms.Normalize(mean=[.5,.5,.5],std=[.5,.5,.5])  #规范化
transform=transforms.Compose([                                  #数据处理
    transforms.Resize((64,64)),
    transforms.ToTensor(),
    normalize
])
dataset_train=ImageFolder('./data/train_sample',transform=transform)     #训练数据集
# print(dataset_tran[0])
dataset_valid=ImageFolder('./data/test_sample',transform=transform)     #验证或测试数据集
# print(dataset_train.classer)#返回类别
print(dataset_train.class_to_idx)                               #返回类别及其索引
# print(dataset_train.imgs)#返回图片路径
print(dataset_valid.class_to_idx)
train_data_size=len(dataset_train)                              #放回数据集长度
test_data_size=len(dataset_valid)
print("训练数据集的长度为：{}".format(train_data_size))
print("测试数据集的长度为：{}".format(test_data_size))
#torch自带的标准数据集加载函数
dataloader_train=DataLoader(dataset_train,batch_size=4,shuffle=True,num_workers=0,drop_last=True)
dataloader_test=DataLoader(dataset_valid,batch_size=4,shuffle=True,num_workers=0,drop_last=True)

#2.模型加载
model_ft=models.resnet18(pretrained=True)#使用迁移学习，加载预训练权重
print(model_ft)

in_features=model_ft.fc.in_features
model_ft.fc=nn.Sequential(nn.Linear(in_features,36),
                          nn.Linear(36,6))#将最后的全连接改为（36，6），使输出为六个小数，对应六种植物的置信度
#冻结卷积层函数
# for i,para in enumerate(model_ft.parameters()):
#     if i<18:
#         para.requires_grad=False

# print(model_ft)


# model_ft.half()#可改为半精度，加快训练速度，在这里不适用



# model_ft=model_ft.cuda()#将模型迁移到gpu

model_ft = model_ft.to(device) #将模型迁移到gpu

#3.优化器
loss_fn=nn.CrossEntropyLoss()

# loss_fn=loss_fn.cuda()  #将loss迁移到gpu

loss_fn = loss_fn.to(device)  #将loss迁移到gpu

learn_rate=0.01         #设置学习率
optimizer=torch.optim.SGD(model_ft.parameters(),lr=learn_rate,momentum=0.01)#可调超参数

total_train_step=0
total_test_step=0
epoch=50                #迭代次数
writer=SummaryWriter("logs_train_yaopian")
best_acc=-1
ss_time=time.time()

for i in range(epoch):
    start_time = time.time()
    print("--------第{}轮训练开始---------".format(i+1))
    model_ft.train()
    for data in dataloader_train:
        imgs,targets=data
        # if torch.cuda.is_available():
        # imgs.float()
        # imgs=imgs.float()#为上述改为半精度操作，在这里不适用
#         imgs=imgs.cuda()
        imgs = imgs.to(device)
#         targets=targets.cuda()
        targets = targets.to(device)
        # imgs=imgs.half()
        outputs=model_ft(imgs).to(device)
        loss=loss_fn(outputs,targets)

        optimizer.zero_grad()   #梯度归零
        loss.backward()         #反向传播计算梯度
        optimizer.step()        #梯度优化

        total_train_step=total_train_step+1
        if total_train_step%100==0:#一轮时间过长可以考虑加一个
            end_time=time.time()
            print("使用GPU训练100次的时间为：{}".format(end_time-start_time))
            print("训练次数：{},loss:{}".format(total_train_step,loss.item()))
            # writer.add_scalar("valid_loss",loss.item(),total_train_step)
    model_ft.eval()
    total_test_loss=0
    total_accuracy=0
    with torch.no_grad():       #验证数据集时禁止反向传播优化权重
        for data in dataloader_test:
            imgs,targets=data
            # if torch.cuda.is_available():
            # imgs.float()
            # imgs=imgs.float()
            imgs = imgs.to(device)
            targets = targets.to(device)
            # imgs=imgs.half()
            outputs=model_ft(imgs)
            loss=loss_fn(outputs,targets)
            total_test_loss=total_test_loss+loss.item()
            accuracy=(outputs.argmax(1)==targets).sum()
            total_accuracy=total_accuracy+accuracy
        print("整体测试集上的loss：{}(越小越好,与上面的loss无关此为测试集的总loss)".format(total_test_loss))
        print("整体测试集上的正确率：{}(越大越好)".format(total_accuracy / len(dataset_valid)))

        writer.add_scalar("valid_loss",(total_accuracy/len(dataset_valid)),(i+1))#选择性使用哪一个
        total_test_step = total_test_step + 1
        if total_accuracy > best_acc:   #保存迭代次数中最好的模型
            print("已修改模型")
            best_acc = total_accuracy
            torch.save(model_ft, "best_model_yaopian.pth")
ee_time=time.time()
zong_time=ee_time-ss_time
print("训练总共用时:{}h:{}m:{}s".format(int(zong_time//3600),int((zong_time%3600)//60),int(zong_time%60))) #打印训练总耗时
writer.close()


是否使用GPU训练：True
{'n000013': 0, 'n000025': 1, 'n000136': 2, 'n000146': 3, 'n000167': 4, 'n000172': 5, 'n000267': 6, 'n000268': 7, 'n000270': 8, 'n000272': 9, 'n000281': 10, 'n000302': 11, 'n000310': 12, 'n000320': 13, 'n000325': 14, 'n000336': 15, 'n000360': 16, 'n000390': 17, 'n000399': 18, 'n000411': 19, 'n000418': 20, 'n000426': 21, 'n000454': 22, 'n000462': 23, 'n000463': 24, 'n000476': 25, 'n000515': 26, 'n000526': 27, 'n000549': 28, 'n000570': 29, 'n000575': 30, 'n000586': 31, 'n000613': 32, 'n000641': 33, 'n000642': 34, 'n000655': 35, 'n000656': 36, 'n000674': 37, 'n000682': 38, 'n000696': 39, 'n000700': 40, 'n000716': 41, 'n000731': 42, 'n000737': 43, 'n000788': 44, 'n000805': 45, 'n000855': 46, 'n000870': 47, 'n000897': 48, 'n000908': 49, 'n000924': 50, 'n000941': 51, 'n000966': 52, 'n000994': 53, 'n001012': 54, 'n001043': 55, 'n001082': 56, 'n001123': 57, 'n001139': 58, 'n001148': 59, 'n001169': 60, 'n001179': 61, 'n001188': 62, 'n001204': 63, 'n001231': 64, 'n001261': 65, 'n00

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

使用GPU训练100次的时间为：65.24440431594849
训练次数：6200,loss:0.0
使用GPU训练100次的时间为：70.68306040763855
训练次数：6300,loss:0.0
使用GPU训练100次的时间为：76.24047017097473
训练次数：6400,loss:0.0
使用GPU训练100次的时间为：81.64021134376526
训练次数：6500,loss:0.024114787578582764
使用GPU训练100次的时间为：87.71177911758423
训练次数：6600,loss:0.0
使用GPU训练100次的时间为：94.04598021507263
训练次数：6700,loss:0.0
使用GPU训练100次的时间为：100.61912441253662
训练次数：6800,loss:0.0
使用GPU训练100次的时间为：107.16691517829895
训练次数：6900,loss:0.0
使用GPU训练100次的时间为：113.7373161315918
训练次数：7000,loss:0.0
使用GPU训练100次的时间为：120.24310827255249
训练次数：7100,loss:0.0
使用GPU训练100次的时间为：126.58569717407227
训练次数：7200,loss:0.0
使用GPU训练100次的时间为：132.8660752773285
训练次数：7300,loss:0.0
使用GPU训练100次的时间为：139.15867924690247
训练次数：7400,loss:0.0
使用GPU训练100次的时间为：145.46357226371765
训练次数：7500,loss:0.0
整体测试集上的loss：0.2624941846297588(越小越好,与上面的loss无关此为测试集的总loss)
整体测试集上的正确率：0.0(越大越好)
--------第4轮训练开始---------
使用GPU训练100次的时间为：6.296970844268799
训练次数：7600,loss:0.0
使用GPU训练100次的时间为：12.591051816940308
训练次数：7700,loss:0.0
使用GPU训练100次的时间为：18.9173

RuntimeError: MPS backend out of memory (MPS allocated: 189.92 MB, other allocations: 17.94 GB, max allowed: 18.13 GB). Tried to allocate 9.00 MB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).