
**<font size=3>导入平台数据集</font>**

**数据集插件 -> 添加数据集 -> 数据集列表 -> 插入代码**

导入平台数据集并调试好代码后，点击代码编辑区的【运行全量】按钮，查看在全量数据的运行结果，全量数据运行结果的调用方式可参考：[《用户手册-全量运行代码调用》](/#/user-manual/experiment/experiment-debug)

全量运行之后，获取全量运行结果的主要流程：
**上传模型文件 -> 申报审核文件 -> 审核通过 ->下载文件**

上传模型文件接口示例：

```python
import wfio
wfio.upload_to_oss(server_path,local_path) 
#server_path是上传到服务器的文件名称，可以自定义
#local_path是本地上传的文件路径, 必须是本地已经存在的文件路径。
```


In [11]:
#从S3将训练数据集下载到当前目录下的"./traindata"文件夹下

import wfio
_INPUT1 = '{"type":25,"uri":"awss3442b3cbc7ba141e388f8b5ab770dd51d:/"}'   #文件的路径
marker = ""
while True:
    ret = wfio.listdir(_INPUT1, marker=marker)
    print("Downloading {0} files...".format(len(ret["file_list"])))
    wfio.download(
        _INPUT1,
        local_path="traindata",                 # 文件下载保存的目录，为None则表示下载到当前目录
        selected_files=ret["file_list"], # 下载的文件名称列表，至少指定一个文件名称
        overwrite=False                  # 是否覆盖同名文件
    )
    if not ret["has_more"]:              # 判断是否有更多文件
        print("All files download completed")
        break
    marker = ret["next_marker"]          # 下一次分页查询的开始标识

Downloading 1000 files...
Downloading 1000 files...
Downloading 1000 files...
Downloading 1000 files...
Downloading 1000 files...
Downloading 1000 files...
Downloading 1000 files...
Downloading 1000 files...
Downloading 965 files...
All files download completed


In [12]:
#导入数据集garbage-test-sample
#测试数据集将下载到当前目录下的"./testdata"文件夹下，如需改变目录名称请自行修改
import wfio
_INPUT = '{"type":25,"uri":"awss3635559b903864399b44f4bd378d93e60:/"}'
dir_name = './testdata'
marker = "" # 下一次分页查询的开始标识
while True:
   ret = wfio.listdir(_INPUT, marker=marker)  
   print("Downloading {0} files...".format(len(ret["file_list"])))
   wfio.download(
       _INPUT,
       local_path="testdata",                 # 文件下载保存的目录，为None则表示下载到当前目录
       selected_files=ret["file_list"], # 下载的文件名称列表，至少指定一个文件名称
       overwrite=False                  # 是否覆盖同名文件
   )
   if not ret["has_more"]:              # 判断是否有更多文件, has_more字段不能修改
       print("All files download completed")
       break
   marker = ret["next_marker"]          # 下一次分页查询的开始标识

Downloading 1000 files...
Downloading 1000 files...
Downloading 399 files...
All files download completed


In [13]:
#导入必要的包
import os
import pandas as pd
from PIL import Image
import torch
import torchvision.transforms as transforms
import torchvision.models as models
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.nn.functional as F
import time
import matplotlib.pyplot as plt
from wf_analyse.analyse import base
import numpy as np
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import time

In [14]:
#定义数据集
class GarbageDataset(Dataset):
    def __init__(self, root_path, transform=None, target_transform=None):
        imgs_path = []
        imgs_label = []
        for file in os.listdir(root_path): 
            img_path = os.path.join(root_path, file)
            imgs_path.append(img_path)
            imgs_label.append(img_path.replace("images", "labels").replace("jpg", "txt"))
        
        self.imgs_path = imgs_path
        self.imgs_label = imgs_label
        self.transform = transform
        self.target_transform = target_transform
 
    def __len__(self):
        return len(self.imgs_label)
    
    def __getitem__(self, idx):
        image = Image.open(self.imgs_path[idx])
        
        #如果图像非三通道，则读取下一张图片，直至读取到正确的三通道图片
        while len(image.split())!=3:
            idx += 1
            image = Image.open(self.imgs_path[idx])
        
        #读取label文件
        with open(self.imgs_label[idx], "r") as f:
            label = int(f.read().split(",")[-1][1:])
            
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        sample = {"image": image, "label": label}
        return sample


In [15]:
#自定义简单CNN，输入是224*224
class Net(nn.Module):
    def __init__(self, classes_num = 4):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 128,3,3)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(128, 16,3, 3)
        self.fc1 = nn.Linear(16 * 6 * 6, 120)
        self.fc2 = nn.Linear(120, classes_num)
 
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 6 * 6)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [16]:
class FocalLoss(nn.Module):
    def __init__(self, class_num, alpha=None, gamma=2, size_average=True):
        super(FocalLoss, self).__init__()
        if alpha is None:
            self.alpha = Variable(torch.ones(class_num, 1))
        else:
            if isinstance(alpha, Variable):
                self.alpha = torch.ones(class_num, 1)*alpha
            else:
                self.alpha = Variable(torch.ones(class_num, 1)*alpha)
        self.gamma = gamma
        self.class_num = class_num
        self.size_average = size_average

    def forward(self, inputs, targets):
        N = inputs.size(0)
        C = inputs.size(1)
        P = F.softmax(inputs)

        class_mask = inputs.data.new(N, C).fill_(0)
        class_mask = Variable(class_mask)
        ids = targets.view(-1, 1)
        class_mask.scatter_(1, ids.data, 1.)

        if inputs.is_cuda and not self.alpha.is_cuda:
            self.alpha = self.alpha.cuda()
        alpha = self.alpha[ids.data.view(-1)]

        probs = (P*class_mask).sum(1).view(-1,1)

        log_p = probs.log()

        batch_loss = -alpha*(torch.pow((1-probs), self.gamma))*log_p

        if self.size_average:
            loss = batch_loss.mean()
        else:
            loss = batch_loss.sum()
        return loss

In [17]:
def train_model(model, criterion, optimizer,  dataloaders,dataset_sizes,num_epochs=100):
    best_loss = 100
    best_acc = 0
    train_loss_list = []
    train_acc_list = []
    test_loss_list = []
    test_acc_list = []
    
    print("\nStart Train......")
    #创建模型保存的文件夹
    if not  os.path.exists("./models"):
        os.makedirs("./models")
        
    for epoch in range(num_epochs):
        start = time.time()
        print('\nEpoch: {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
            running_loss = 0.0
            running_corrects = 0

            for i, data in enumerate(dataloaders[phase], 0):
                batch_img, batch_labels = data['image'], data['label']
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(batch_img)
                    preds = torch.argmax(outputs, dim=1)
                    loss = criterion(outputs, batch_labels)
                    
                    #训练模式进行反向传播，更新参数
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                        
                running_loss += loss.item() * batch_img.size(0)
                running_corrects += torch.sum(preds.float() == batch_labels.float())
            end = time.time()
            
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
    
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
            
            #保存acc最佳的模型
            if phase == 'test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                torch.save(model, './models/mycnn.pth')
            
            if phase == 'test' and epoch_loss < best_loss:
                best_loss = epoch_loss
            
            #保存训练过程loss与acc值
            if phase == "train":
                train_loss_list.append(epoch_loss)
                train_acc_list.append(epoch_acc)
            else:
                test_loss_list.append(epoch_loss)
                test_acc_list.append(epoch_acc)
            
        
        
        print('Training complete in {:.4f} mins'.format((end - start)/60))
        print('Best val Loss: {:4f}\n'.format(best_loss))
        print('Best val acc: {:4f}\n'.format(best_acc))
    
    
    print("End Train......\n")
    
    #在全量运行时，保存训练集在训练过程中的loss和acc值，用于绘制loss和acc变化曲线图
    for i in range(0, num_epochs, 1):
        base.save_norm_by_step(model_name='train', norm_name='Loss',norm_value=np.float(train_loss_list[i]),step=i)
        base.save_norm_by_step(model_name='train', norm_name='Accuracy',norm_value=np.float(train_acc_list[i]),step=i)
    
     #在全量运行时，保存测试集在训练过程中的loss与acc值，用于绘制loss和acc变化曲线图
    for i in range(0, num_epochs, 1):
        base.save_norm_by_step(model_name='test', norm_name='Loss',norm_value=np.float(test_loss_list[i]),step=i)
        base.save_norm_by_step(model_name='test', norm_name='Accuracy',norm_value=np.float(test_acc_list[i]),step=i)
        
    #绘制验证集在训练过程中的loss曲线图
    epoch = []  # 横坐标,迭代次数
    for i in range(0, num_epochs, 1):
        epoch.append(i)

    plt.plot(epoch, train_loss_list)
    # plt.plot([1,3,3,4], [1,4,9,16])
    plt.ylabel('train loss')
    plt.xlabel('train epoch')
    plt.title(" Curve of loss(crossentory) function of train set")
    plt.show()
    
    #绘制验证集在训练过程中的acc曲线图
    epoch = []  # 横坐标,迭代次数
    for i in range(0, num_epochs, 1):
        epoch.append(i)

    plt.plot(epoch, train_acc_list)
    # plt.plot([1,3,3,4], [1,4,9,16])
    plt.ylabel('train acc')
    plt.xlabel('train epoch')
    plt.title(" Curve of Accuracy  function of train set")
    plt.show()

In [18]:
def main():
    #1.基本参数设置
    learning_rate = 0.01  #学习率
    epoch_num = 50         #学习迭代次数
    
    
    #2.数据位置
    train_root_path = '/home/nbuser/work_2347b8d076f44f16a78f622099a9b1e8/traindata/train/images/'
    test_root_path = '/home/nbuser/work_2347b8d076f44f16a78f622099a9b1e8/testdata/test/images/'
    
    
    #4.数据预处理
    train_transform =transforms.Compose(
        [transforms.RandomHorizontalFlip(),
        transforms.Resize([224,224]),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
     
    test_transform =transforms.Compose(
        [transforms.Resize([224,224]),
         transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ])
    
    #5.生成dataloader数据集
    train_set = GarbageDataset(root_path=train_root_path,transform=train_transform)
    train_loader = DataLoader(train_set, batch_size=16, shuffle=True)
    
    test_set = GarbageDataset(root_path=test_root_path,transform=test_transform)
    test_loader = DataLoader(test_set, batch_size=1, shuffle=True)
    
    dataloaders = {'train': train_loader, 'test': test_loader}
    dataset_sizes = {'train': len(train_set), 'test': len(test_set)}
    print('train samples:{}张'.format(dataset_sizes["train"]))
    print('test samples:{}张'.format(dataset_sizes["test"]))
    
    
    
    #6。定义模型、优化器、损失函数
    #model = Net(39)
    model = models.resnet50(pretrained=True, progress=True)
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=1e-4)
    #optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)
    criterion = nn.CrossEntropyLoss()
    
    #7.开始训练模型
    train_model(model, criterion, optimizer, num_epochs=epoch_num, dataloaders=dataloaders, dataset_sizes=dataset_sizes)

In [None]:
if __name__ == '__main__':
    #训练
    main()
    #模型申报审核
    wfio.upload_to_oss("mycnn.model", "./models/mycnn.model")  #将分类模型上传至 oss

train samples:4482张
test samples:1199张

Start Train......

Epoch: 1/50
----------
train Loss: 2.5133 Acc: 0.2992
test Loss: 1.7651 Acc: 0.4287
Training complete in 26.4895 mins
Best val Loss: 1.765090

Best val acc: 0.428691


Epoch: 2/50
----------
train Loss: 1.5451 Acc: 0.5054
test Loss: 1.4157 Acc: 0.5563
Training complete in 26.1127 mins
Best val Loss: 1.415704

Best val acc: 0.556297


Epoch: 3/50
----------
train Loss: 1.1184 Acc: 0.6421
test Loss: 1.2155 Acc: 0.6330
Training complete in 26.1400 mins
Best val Loss: 1.215479

Best val acc: 0.633028


Epoch: 4/50
----------
train Loss: 0.8660 Acc: 0.7218
test Loss: 1.2939 Acc: 0.6539
Training complete in 26.0378 mins
Best val Loss: 1.215479

Best val acc: 0.653878


Epoch: 5/50
----------
train Loss: 0.6996 Acc: 0.7742
test Loss: 1.0401 Acc: 0.7039
Training complete in 26.0621 mins
Best val Loss: 1.040053

Best val acc: 0.703920


Epoch: 6/50
----------
train Loss: 0.5691 Acc: 0.8141
test Loss: 0.8897 Acc: 0.7306
Training complete