In [1]:
!nvidia-smi

Sun Oct 23 22:45:52 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.141.03   Driver Version: 470.141.03   CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:01:00.0  On |                  N/A |
| 30%   37C    P8    34W / 350W |   2615MiB / 24259MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

In [2]:
import torch
import torch.nn as nn
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.nn.functional as F
from torchsummary import summary
from torch.utils.data import Dataset,DataLoader
from datetime import datetime
from tqdm import tqdm
import sys
import torch.optim as optim
import argparse
import  os, glob
import  random, csv
from    PIL import Image
import time
import matplotlib.pyplot as plt
import numpy as np
from utils import train2
from utils import plot_history
import torch.optim as optim
torch.__version__

  from .autonotebook import tqdm as notebook_tqdm


'1.7.1'

In [3]:
class PCGload(Dataset):
    def __init__(self, root, resize, label):
        super(PCGload, self).__init__()

        self.root = root
        self.resize = resize
        self.name2label = label # "n":0  "abn":1
        self.images, self.labels = self.load_csv('images.csv')

    def load_csv(self, filename):

        if not os.path.exists(os.path.join(self.root, filename)):
            images = []
            for name in self.name2label.keys():
                # 'pokemon\\mewtwo\\00001.png
                images += glob.glob(os.path.join(self.root, name, '*.png'))
                # images += glob.glob(os.path.join(self.root, name, '*.jpg'))
                # images += glob.glob(os.path.join(self.root, name, '*.jpeg'))

            # 1167, 'pokemon\\bulbasaur\\00000000.png'
            print(len(images), images)

            random.shuffle(images)
            with open(os.path.join(self.root, filename), mode='w', newline='') as f:
                writer = csv.writer(f)
                for img in images: # 'pokemon\\bulbasaur\\00000000.png'
                    name = img.split(os.sep)[-2]
                    label = self.name2label[name]
                    # 'pokemon\\bulbasaur\\00000000.png', 0
                    writer.writerow([img, label])
                print('writen into csv file:', filename)

        # read from csv file
        images, labels = [], []
        with open(os.path.join(self.root, filename)) as f:
            reader = csv.reader(f)
            for row in reader:
                # 'pokemon\\bulbasaur\\00000000.png', 0
                img, label = row
                label = int(label)

                images.append(img)
                labels.append(label)

        assert len(images) == len(labels)

        return images, labels



    def __len__(self):

        return len(self.images)

    def __getitem__(self, idx):
        # idx~[0~len(images)]
        # self.images, self.labels
        # img: 'pokemon\\bulbasaur\\00000000.png'
        # label: 0
        img, label = self.images[idx], self.labels[idx]

        tf = transforms.Compose([
            lambda x:Image.open(x).convert('RGB'), # string path= > image data
            transforms.Resize((int(self.resize*1.25), int(self.resize*1.25))),
            transforms.RandomRotation(15),
            transforms.CenterCrop(self.resize),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

        img = tf(img)
        label = torch.tensor(label)


        return  img,label

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu' # 判断是否用GPU
device

'cuda'

In [5]:
class BasicBlock(nn.Module):
    """
    对于浅层网络，如ResNet-18/34等，用基本的Block
    基础模块没有压缩,所以expansion=1
    """
    expansion = 1
    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock,self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(True),
            nn.Conv2d(out_channels,out_channels, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(out_channels)
        )
        # 如果输入输出维度不等，则使用1x1卷积层来改变维度
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != self.expansion * out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, self.expansion * out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * out_channels),
            )
    def forward(self, x):
        out = self.features(x)
#         print(out.shape)
        out += self.shortcut(x)
        out = torch.relu(out)
        return out

In [6]:
class Bottleneck(nn.Module):
    """
    对于深层网络，我们使用BottleNeck，论文中提出其拥有近似的计算复杂度，但能节省很多资源
    zip_channels: 压缩后的维数，最后输出的维数是 expansion * zip_channels
    针对ResNet50/101/152的网络结构,主要是因为第三层是第二层的4倍的关系所以expansion=4
    """
    expansion = 4
    
    def __init__(self, in_channels, zip_channels, stride=1):
        super(Bottleneck, self).__init__()
        out_channels = self.expansion * zip_channels
        self.features = nn.Sequential(
            nn.Conv2d(in_channels, zip_channels, kernel_size=1, bias=False),
            nn.BatchNorm2d(zip_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(zip_channels, zip_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(zip_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(zip_channels, out_channels, kernel_size=1, bias=False),
            nn.BatchNorm2d(out_channels)
        )
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )
            
    def forward(self, x):
        out = self.features(x)
#         print(out.shape)
        out += self.shortcut(x)
        out = torch.relu(out)
        return out

In [7]:
class ResNet(nn.Module):
    """
    不同的ResNet架构都是统一的一层特征提取、四层残差，不同点在于每层残差的深度。
    对于cifar10，feature map size的变化如下：
    (32, 32, 3) -> [Conv2d] -> (32, 32, 64) -> [Res1] -> (32, 32, 64) -> [Res2] 
 -> (16, 16, 128) -> [Res3] -> (8, 8, 256) ->[Res4] -> (4, 4, 512) -> [AvgPool] 
 -> (1, 1, 512) -> [Reshape] -> (512) -> [Linear] -> (10)
    """
    def __init__(self, block, num_blocks, num_classes=5, verbose = False):
        super(ResNet, self).__init__()
        self.verbose = verbose
        self.in_channels = 64
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True)
        )
        #使用_make_layer函数生成上表对应的conv2_x, conv3_x, conv4_x, conv5_x的结构
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        # cifar10经过上述结构后，到这里的feature map size是 4 x 4 x 512 x expansion
        # 所以这里用了 4 x 4 的平均池化
        self.avg_pool = nn.AvgPool2d(kernel_size=4)
        self.classifer = nn.Linear(512 * block.expansion, num_classes)
        
    def _make_layer(self, block, out_channels, num_blocks, stride):
        # 第一个block要进行降采样
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            # 如果是Bottleneck Block的话需要对每层输入的维度进行压缩，压缩后再增加维数
            # 所以每层的输入维数也要跟着变
            self.in_channels = out_channels * block.expansion
        return nn.Sequential(*layers)
    
    def forward(self, x):
        out = self.features(x)
        if self.verbose:
            print('block 1 output: {}'.format(out.shape))
        out = self.layer1(out)        
        if self.verbose:
            print('block 2 output: {}'.format(out.shape))
        out = self.layer2(out)
        if self.verbose:
            print('block 3 output: {}'.format(out.shape))
        out = self.layer3(out)
        if self.verbose:
            print('block 4 output: {}'.format(out.shape))
        out = self.layer4(out)
        if self.verbose:
            print('block 5 output: {}'.format(out.shape))
        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.classifer(out)
        return out

In [8]:
def ResNet18(verbose=False):
    return ResNet(BasicBlock, [2,2,2,2],verbose=verbose)

def ResNet34(verbose=False):
    return ResNet(BasicBlock, [3,4,6,3],verbose=verbose)

def ResNet50(verbose=False):
    return ResNet(Bottleneck, [3,4,6,3],verbose=verbose)

def ResNet101(verbose=False):
    return ResNet(Bottleneck, [3,4,23,3],verbose=verbose)

def ResNet152(verbose=False):
    return ResNet(Bottleneck, [3,8,36,3],verbose=verbose)

In [9]:
def ResNet10fold(n):
    epoch = 200
    device = 'cuda' if torch.cuda.is_available() else 'cpu' # 判断是否用GPU
    net = ResNet18().to(device)
    if device == 'cuda':
        net = nn.DataParallel(net)
    # 当计算图不会改变的时候（每次输入形状相同，模型不改变）的情况下可以提高性能，反之则降低性能
        torch.backends.cudnn.benchmark = True
    optimizer = optim.SGD(net.parameters(), lr=1e-1, momentum=0.9, weight_decay=1e-4)
    criterion = nn.CrossEntropyLoss()
    # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5,verbose=True,patience = 5,min_lr = 0.000001) # 动态更新学习率
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 15], gamma=0.1)
    label = {'as': 0, 'mr': 1,'ms': 2, 'mvp': 3,'n': 4}
    Batch_Size = 256
    train_root = './dataset_mfcc/train_'+str(n)+'/train'
    test_root = './dataset_mfcc/train_'+str(n)+'/test'
    train_db = PCGload(train_root, 32, label)
    test_db = PCGload(test_root, 32, label)
    trainloader = DataLoader(train_db, batch_size=Batch_Size, shuffle=True,num_workers=0)
    testloader = DataLoader(test_db, batch_size=Batch_Size, num_workers=0)
    save_path = './model/ResNet-' + str(n) + '.pth'
    Acc, Loss, Lr = train2(net, trainloader, testloader, epoch, optimizer, criterion, scheduler, save_path, verbose = True)
    return Acc, Loss, Lr

In [10]:
Acc_5, Loss_5, Lr_5 = ResNet10fold(5)

Epoch [  1/200]  Train Loss:3.642552  Train Acc:26.02% Test Loss:293.099335  Test Acc:27.00%  Learning Rate:0.100000	Time 00:02
Epoch [  2/200]  Train Loss:3.115183  Train Acc:22.84% Test Loss:1485.097656  Test Acc:27.00%  Learning Rate:0.100000	Time 00:01
Epoch [  3/200]  Train Loss:2.394714  Train Acc:23.30% Test Loss:102.606941  Test Acc:15.00%  Learning Rate:0.100000	Time 00:01
Epoch [  4/200]  Train Loss:1.916988  Train Acc:22.20% Test Loss:158.229996  Test Acc:15.00%  Learning Rate:0.100000	Time 00:01
Epoch [  5/200]  Train Loss:1.692104  Train Acc:28.50% Test Loss:89.324181  Test Acc:15.00%  Learning Rate:0.100000	Time 00:01
Epoch [  6/200]  Train Loss:1.523916  Train Acc:31.70% Test Loss:16.807068  Test Acc:16.00%  Learning Rate:0.100000	Time 00:01
Epoch [  7/200]  Train Loss:1.499337  Train Acc:33.34% Test Loss:8.795376  Test Acc:21.00%  Learning Rate:0.100000	Time 00:01
Epoch [  8/200]  Train Loss:1.467312  Train Acc:39.77% Test Loss:2.719419  Test Acc:26.00%  Learning Rate:0

In [11]:
Acc_9, Loss_9, Lr_9 = ResNet10fold(9)
Acc_10, Loss_10, Lr_10 = ResNet10fold(10)

Epoch [  1/200]  Train Loss:3.159491  Train Acc:21.26% Test Loss:67.442284  Test Acc:18.00%  Learning Rate:0.100000	Time 00:01
Epoch [  2/200]  Train Loss:3.446357  Train Acc:24.23% Test Loss:81353.414062  Test Acc:18.00%  Learning Rate:0.100000	Time 00:01
Epoch [  3/200]  Train Loss:2.497972  Train Acc:21.31% Test Loss:9624.108398  Test Acc:18.00%  Learning Rate:0.100000	Time 00:01
Epoch [  4/200]  Train Loss:2.325742  Train Acc:28.88% Test Loss:281.095032  Test Acc:28.00%  Learning Rate:0.100000	Time 00:01
Epoch [  5/200]  Train Loss:1.757418  Train Acc:30.35% Test Loss:18.571445  Test Acc:18.00%  Learning Rate:0.100000	Time 00:01
Epoch [  6/200]  Train Loss:1.668092  Train Acc:39.24% Test Loss:4.659865  Test Acc:24.00%  Learning Rate:0.100000	Time 00:01
Epoch [  7/200]  Train Loss:1.431378  Train Acc:45.46% Test Loss:4.117309  Test Acc:17.00%  Learning Rate:0.100000	Time 00:01
Epoch [  8/200]  Train Loss:1.232979  Train Acc:50.71% Test Loss:4.494325  Test Acc:18.00%  Learning Rate:0

In [22]:
Acc_1, Loss_1, Lr_1 = ResNet10fold(1)
Acc_2, Loss_2, Lr_2 = ResNet10fold(2)
Acc_3, Loss_3, Lr_3 = ResNet10fold(3)
Acc_4, Loss_4, Lr_4 = ResNet10fold(4)
Acc_5, Loss_5, Lr_5 = ResNet10fold(5)
Acc_6, Loss_6, Lr_6 = ResNet10fold(6)
Acc_7, Loss_7, Lr_7 = ResNet10fold(7)
Acc_8, Loss_8, Lr_8 = ResNet10fold(8)
Acc_9, Loss_9, Lr_9 = ResNet10fold(9)
Acc_10, Loss_10, Lr_10 = ResNet10fold(10)

900 ['./dataset_mfcc/train_2/train/as/New_AS_001.png', './dataset_mfcc/train_2/train/as/New_AS_002.png', './dataset_mfcc/train_2/train/as/New_AS_005.png', './dataset_mfcc/train_2/train/as/New_AS_006.png', './dataset_mfcc/train_2/train/as/New_AS_007.png', './dataset_mfcc/train_2/train/as/New_AS_008.png', './dataset_mfcc/train_2/train/as/New_AS_009.png', './dataset_mfcc/train_2/train/as/New_AS_010.png', './dataset_mfcc/train_2/train/as/New_AS_011.png', './dataset_mfcc/train_2/train/as/New_AS_012.png', './dataset_mfcc/train_2/train/as/New_AS_013.png', './dataset_mfcc/train_2/train/as/New_AS_014.png', './dataset_mfcc/train_2/train/as/New_AS_015.png', './dataset_mfcc/train_2/train/as/New_AS_016.png', './dataset_mfcc/train_2/train/as/New_AS_018.png', './dataset_mfcc/train_2/train/as/New_AS_019.png', './dataset_mfcc/train_2/train/as/New_AS_020.png', './dataset_mfcc/train_2/train/as/New_AS_021.png', './dataset_mfcc/train_2/train/as/New_AS_022.png', './dataset_mfcc/train_2/train/as/New_AS_023.p

In [31]:
Loss_1

{'train_loss': [3.5494699478149414,
  6.134739398956299,
  3.794572591781616,
  3.3339171409606934,
  2.893717050552368,
  2.28751802444458,
  1.7380825281143188,
  1.5944995880126953,
  1.4700541496276855,
  1.298728585243225,
  1.2044198513031006,
  1.1492228507995605,
  1.0860313177108765,
  1.071069359779358,
  1.0810062885284424,
  1.0872877836227417,
  1.0532915592193604,
  1.1065585613250732,
  1.0469012260437012,
  1.0508215427398682,
  1.082825779914856,
  1.0359331369400024,
  1.0333058834075928,
  1.0268430709838867,
  1.0889983177185059,
  1.0311956405639648,
  1.0357800722122192,
  1.0445901155471802,
  1.059631109237671,
  1.026297688484192,
  1.0214073657989502,
  1.0031522512435913,
  1.015625238418579,
  1.0231740474700928,
  1.0330766439437866,
  1.0120478868484497,
  1.0170117616653442,
  1.0152535438537598,
  1.022308349609375,
  1.0054600238800049,
  1.044729471206665,
  1.005223035812378,
  1.028275489807129,
  0.9980553388595581,
  1.0088666677474976,
  0.9993336

In [None]:
train_acc1 = Acc_1['test_acc']
test_acc1 = Loss_1['test_loss']
train_loss1 = []
test_loss1 = []
train_acc2=[]
test_acc2=[]
train_loss2=[]
test_loss2=[]
train_acc3=[]
test_acc3=[]
train_loss3=[]
test_loss3=[]
train_acc4=[]
test_acc4=[]
train_loss4=[]
test_loss4=[]
train_acc5=[]
test_acc5=[]
train_loss5=[]
test_loss5=[]
train_acc6=[]
test_acc6=[]
train_loss6=[]
test_loss6=[]
train_acc7=[]
test_acc7=[]
train_loss7=[]
test_loss7=[]
train_acc8=[]
test_acc8=[]
train_loss8=[]
test_loss8=[]
train_acc9=[]
test_acc9=[]
train_loss9=[]
test_loss9=[]
train_acc10=[]
test_acc10=[]
train_loss10=[]
test_loss10=[]

In [None]:
train_acc=train_acc1
test_acc=test_acc1
train_loss=train_loss1
test_loss=test_loss1
for i in range(len(train_acc1)):
    train_acc[i]=train_acc[i]/10+train_acc2[i]/10+train_acc3[i]/10+train_acc4[i]/10+train_acc5[i]/10+train_acc6[i]/10+train_acc7[i]/10+train_acc8[i]/10+train_acc9[i]/10+train_acc10[i]/10
for i in range(len(test_acc1)):
    test_acc[i]=test_acc[i]/10+test_acc2[i]/10+test_acc3[i]/10+test_acc4[i]/10+test_acc5[i]/10+test_acc6[i]/10+test_acc7[i]/10+test_acc8[i]/10+test_acc9[i]/10+test_acc10[i]/10
for i in range(len(train_loss1)):
    train_loss[i] = train_loss[i]/10 +train_loss2[i]/10+train_loss3[i]/10+train_loss4[i]/10+train_loss5[i]/10+train_loss6[i]/10+train_loss7[i]/10+train_loss8[i]/10+train_loss9[i]/10+train_loss10[i]/10
for i in range(len(test_loss1)):
    test_loss[i]=test_loss[i]/10+test_loss2[i]/10+test_loss3[i]/10+test_loss4[i]/10+test_loss5[i]/10+test_loss5[i]/10+test_loss6[i]/10+test_loss8[i]/10+test_loss9[i]/10+test_loss10[i]/10