## 代码要求：使用五倍交叉，训练100轮，使用早停技术

# test

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from torch.autograd import Variable
import torch.nn as nn
from torchvision import datasets,transforms

In [None]:
import torch
import pandas as pd
import numpy as np
import skimage
from torch.utils.data import Dataset, DataLoader
torch.manual_seed(1)  # reproducible
torch.set_default_tensor_type(torch.DoubleTensor)
from PIL import Image
transform = transforms.Compose([
    transforms.ToTensor(),  # 将图片转换为Tensor,归一化至[0,1]
])


In [None]:
# dataset=MyDataset('/content/drive/MyDrive/100/test1/train.npy',"/content/drive/MyDrive/100/test1/label.npy")
# # dataset_test=MyDataset('/content/drive/MyDrive/100/test1/x_test.npy',"/content/drive/MyDrive/100/test1/y_test.npy")
# train_loader= DataLoader(dataset, batch_size=32, shuffle=True, pin_memory=True)
# # test_loader= DataLoader(dataset_test, batch_size=32, shuffle=False, pin_memory=True)

In [None]:
# for data, target in test_loader:
#     print(data.shape)
#     print("DDDDDDDDD")
#     print(target)
#     break

In [None]:

__all__ = ['ResNet50', 'ResNet101','ResNet152']

def Conv1(in_planes, places, stride=2):
    return nn.Sequential(
        nn.Conv2d(in_channels=in_planes,out_channels=places,kernel_size=7,stride=stride,padding=3, bias=False),
        nn.BatchNorm2d(places),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    )

class Bottleneck(nn.Module):
    def __init__(self,in_places,places, stride=1,downsampling=False, expansion = 4):
        super(Bottleneck,self).__init__()
        self.expansion = expansion
        self.downsampling = downsampling

        self.bottleneck = nn.Sequential(
            nn.Conv2d(in_channels=in_places,out_channels=places,kernel_size=1,stride=1, bias=False),
            nn.BatchNorm2d(places),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=places, out_channels=places, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(places),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=places, out_channels=places*self.expansion, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(places*self.expansion),
        )

        if self.downsampling:
            self.downsample = nn.Sequential(
                nn.Conv2d(in_channels=in_places, out_channels=places*self.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(places*self.expansion)
            )
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        residual = x
        out = self.bottleneck(x)

        if self.downsampling:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self,blocks, num_classes=8, expansion = 4):
        super(ResNet,self).__init__()
        self.expansion = expansion

        self.conv1 = Conv1(in_planes = 1, places= 64)

        self.layer1 = self.make_layer(in_places = 64, places= 64, block=blocks[0], stride=1)
        self.layer2 = self.make_layer(in_places = 256,places=128, block=blocks[1], stride=2)
        self.layer3 = self.make_layer(in_places=512,places=256, block=blocks[2], stride=2)
        self.layer4 = self.make_layer(in_places=1024,places=512, block=blocks[3], stride=2)

        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc = nn.Linear(8192,200)
        self.fc2 = nn.Linear(200,num_classes)
        self.logsoftmax = nn.LogSoftmax(dim=1)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def make_layer(self, in_places, places, block, stride):
        layers = []
        layers.append(Bottleneck(in_places, places,stride, downsampling =True))
        for i in range(1, block):
            layers.append(Bottleneck(places*self.expansion, places))

        return nn.Sequential(*layers)


    def forward(self, x):
        x = self.conv1(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        x = self.fc2(x)
        
        return self.logsoftmax(x)

def ResNet50():
    return ResNet([3, 4, 6, 3])

def ResNet101():
    return ResNet([3, 4, 23, 3])

def ResNet152():
    return ResNet([3, 8, 36, 3])

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
device

In [None]:
model = ResNet50()

In [None]:
loss_fn = nn.NLLLoss()
optimizer = torch.optim.SGD(model.parameters(),lr=0.0015,momentum=0.5)

In [None]:
from sklearn.metrics import log_loss
from sklearn.model_selection import KFold as kFold

In [None]:
x_train=np.load("/content/drive/MyDrive/summer_start/data_generate/train_1channel.npy")
y_train=np.load("/content/drive/MyDrive/summer_start/data_generate/label.npy")

In [None]:
kfold =kFold(n_splits=5,shuffle=True,random_state=1)

In [None]:
class MyDataset(Dataset):
    def __init__(self, data,label):
        self.data = data #加载npy数据
        self.label = label
        self.transforms = transform #转为tensor形式
    def __getitem__(self, index):
        hdct= self.data[index, :, :, :]  # 读取每一个npy的数据
        hdct = np.squeeze(hdct)  # 删掉一维的数据，就是把通道数这个维度删除
#         ldct = 2.5 * skimage.util.random_noise(hdct * (0.4 / 255), mode='poisson', seed=None) * 255 #加poisson噪声
#         hdct=Image.fromarray(np.uint8(hdct)) #转成image的形式
#         ldct=Image.fromarray(np.uint8(ldct)) #转成image的形式
        hdct= self.transforms(hdct)  #转为tensor形式
#         ldct= self.transforms(ldct)  #转为tensor形式
        return hdct, self.label[index] #返回数据还有标签
    def __len__(self):
        return self.data.shape[0] #返回数据的总个数

In [None]:
x_train.shape

(13887, 1, 236, 236)

In [None]:
labels=[0,1,2,3,4,5,6,7]
list_score=[]
for fold, (train_index, test_index) in enumerate(kfold.split(x_train, y_train)):
    print("train_index",train_index)
    print("test_index",test_index)
    ### Dividing data into folds
    x_train_fold = x_train[train_index]
    x_test_fold = x_train[test_index]
    y_train_fold = y_train[train_index]
    y_test_fold = y_train[test_index]
    train = MyDataset(x_train_fold, y_train_fold)
    test = MyDataset(x_test_fold, y_test_fold)
    train_loader = DataLoader(train, batch_size = 16, shuffle = True)
    test_loader = DataLoader(test, batch_size = 16, shuffle = False)
    print("fold:",fold)
    max_score=[]
    # model = Net().to(device)
    best_acc=0
    model = ResNet50().to(device)
    loss_fn = nn.NLLLoss()
    optimizer = torch.optim.SGD(model.parameters(),lr=0.005,momentum=0.5)
    for i in range(30):
      list1=[]
      for t, (data, target) in enumerate(train_loader):
        # print("t",t)
        data,target = Variable(data.to(device)),Variable(target.to(device))
        # print(data.shape)
        pred = model(data.double().to(device))
        loss = loss_fn(pred,target)
        
        list1.append(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # print("loss",loss.item())
      print("netloss",i,np.mean(list1))
      with torch.no_grad():
        list_loss=[]
        correct = 0
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data.double().to(device))
            # get the index of the max log-probability
            pred = output.data.max(1, keepdim=True)[1]
            pred_loss=np.exp(output.data.cpu())
            logsloss=log_loss(target.cpu().detach().numpy(),pred_loss,labels=labels)
            list_loss.append(logsloss)
            correct += pred.eq(target.data.view_as(pred)).cpu().sum()
        print('accuracy:{:.3f}%'.format(
            100. * correct / len(test_loader.dataset)))
        val_acc=correct / len(test_loader.dataset)
        print("log_loss",sum(list_loss)/len(list_loss))
        print("*"*20)
        if val_acc > best_acc:
            best_acc = val_acc
            es = 0
        else:
            es += 1
            print("Counter {} of 5".format(es))
            if es > 3:
                print("Early stopping with best_acc: ", best_acc, "and val_acc for this epoch: ", val_acc, "...")
                break
    list_score.append(best_acc)
    print("*"*40)

train_index [    0     1     2 ... 13884 13885 13886]
test_index [    4     5     6 ... 13861 13872 13877]
fold: 0
netloss 0 1.4436160927729498
accuracy:55.796%
log_loss 1.260478154318904
********************
netloss 1 1.1786964462347498
accuracy:61.123%
log_loss 1.130843280862045
********************
netloss 2 0.9535876831880313
accuracy:73.866%
log_loss 0.8064599028190419
********************
netloss 3 0.7717215712190462
accuracy:79.194%
log_loss 0.6893394520743852
********************
netloss 4 0.6856610141950833
accuracy:79.230%
log_loss 0.6771290050752404
********************
netloss 5 0.6282891765465771
accuracy:76.854%
log_loss 0.7327031703433012
********************
Counter 1 of 5
netloss 6 0.585313420565014
accuracy:79.590%
log_loss 0.6225047644819209
********************
netloss 7 0.5372883328062648
accuracy:81.605%
log_loss 0.5898319164818648
********************
netloss 8 0.508268842584863
accuracy:83.909%
log_loss 0.5275673408763185
********************
netloss 9 0.4771059

In [None]:
list_score

In [None]:
labels=[0,1,2,3,4,5,6,7]
list_score=[]
for fold, (train_index, test_index) in enumerate(kfold.split(x_train, y_train)):
  print(fold)
  if fold in [0,1,2]:
    pass
  else:
    print("train_index",train_index)
    print("test_index",test_index)
    ### Dividing data into folds
    x_train_fold = x_train[train_index]
    x_test_fold = x_train[test_index]
    y_train_fold = y_train[train_index]
    y_test_fold = y_train[test_index]
    train = MyDataset(x_train_fold, y_train_fold)
    test = MyDataset(x_test_fold, y_test_fold)
    train_loader = DataLoader(train, batch_size = 16, shuffle = True)
    test_loader = DataLoader(test, batch_size = 16, shuffle = False)
    print("fold:",fold)
    max_score=[]
    # model = Net().to(device)
    best_acc=0
    model = ResNet50().to(device)
    loss_fn = nn.NLLLoss()
    optimizer = torch.optim.SGD(model.parameters(),lr=0.005,momentum=0.5)
    for i in range(30):
      list1=[]
      for t, (data, target) in enumerate(train_loader):
        # print("t",t)
        data,target = Variable(data.to(device)),Variable(target.to(device))
        # print(data.shape)
        pred = model(data.double().to(device))
        loss = loss_fn(pred,target)
        
        list1.append(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # print("loss",loss.item())
      print("netloss",i,np.mean(list1))
      with torch.no_grad():
        list_loss=[]
        correct = 0
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data.double().to(device))
            # get the index of the max log-probability
            pred = output.data.max(1, keepdim=True)[1]
            pred_loss=np.exp(output.data.cpu())
            logsloss=log_loss(target.cpu().detach().numpy(),pred_loss,labels=labels)
            list_loss.append(logsloss)
            correct += pred.eq(target.data.view_as(pred)).cpu().sum()
        print('accuracy:{:.3f}%'.format(
            100. * correct / len(test_loader.dataset)))
        val_acc=correct / len(test_loader.dataset)
        print("log_loss",sum(list_loss)/len(list_loss))
        print("*"*20)
        if val_acc > best_acc:
            best_acc = val_acc
            es = 0
        else:
            es += 1
            print("Counter {} of 5".format(es))
            if es > 3:
                print("Early stopping with best_acc: ", best_acc, "and val_acc for this epoch: ", val_acc, "...")
                break
    list_score.append(best_acc)
    print("*"*40)

0
1
2
3
train_index [    0     1     2 ... 13883 13884 13886]
test_index [    8     9    13 ... 13876 13882 13885]
fold: 3
netloss 0 1.4401101317171838
accuracy:48.866%
log_loss 1.4097851139436115
********************
netloss 1 1.1433220537672881
accuracy:65.610%
log_loss 1.0362045695129596
********************
netloss 2 0.8734471747672347
accuracy:70.292%
log_loss 0.9619548853816423
********************
netloss 3 0.7309643796966312
accuracy:75.225%
log_loss 0.7991901153662166
********************
netloss 4 0.6483293746570215
accuracy:75.333%
log_loss 0.7508976219543856
********************
netloss 5 0.5956460678112849
accuracy:80.555%
log_loss 0.6195242620036859
********************
netloss 6 0.5533276149565337
accuracy:78.286%
log_loss 0.6969063879094269
********************
Counter 1 of 5
netloss 7 0.540152804887891
accuracy:81.743%
log_loss 0.5990256399895432
********************
netloss 8 0.500087855772052
accuracy:82.571%
log_loss 0.5651195205878135
********************
netloss 9

In [None]:
(0.8516+0.8531+0.8549+0.8474+0.8575)/5

0.8529

In [None]:
print(mean(list_score))