## 掛載雲端硬碟

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


##更改檔案所在路徑


In [None]:
# Change to your own folder !!!
%cd /content/drive/MyDrive/Colab Notebooks/AI_lab4

/content/drive/MyDrive/Colab Notebooks/AI_lab4


## 載入函式庫


In [None]:
import os

import torch
import torch.nn as nn
from torch.autograd import Variable
from torchvision import datasets, transforms
import numpy as np

from models import vgg

##超參數設定 PRUEN=0.9

In [None]:
DATASET = 'cifar10'
TEST_BATCH_SIZE = 1000
CUDA = True
PRUNE_PERCENT = 0.9 # Change your prune ratio!
WEIGHT_PATH = '/content/drive/MyDrive/Colab Notebooks/model_best.pth'
PRUNE_PATH = '/content/drive/MyDrive/Colab Notebooks/model_prune.pth'


##載入模型

In [None]:
CUDA = CUDA and torch.cuda.is_available()

model = vgg()
if CUDA:
    model.cuda()

if WEIGHT_PATH:
    if os.path.isfile(WEIGHT_PATH):
        checkpoint = torch.load(WEIGHT_PATH)
        best_prec1 = checkpoint['best_prec1']
        model.load_state_dict(checkpoint['state_dict'])
        print('LOADING CHECKPOINT {} @EPOCH={}, BEST_PREC1={}'.format(WEIGHT_PATH,checkpoint['epoch'],best_prec1))

    else:
        print("NO CHECKPOINT FOUND")

print(model)

LOADING CHECKPOINT /content/drive/MyDrive/Colab Notebooks/model_best.pth @EPOCH=50, BEST_PREC1=0.9271000027656555
vgg(
  (feature): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, trac

##測試函數(觀察模型精確度)


In [None]:
def test(model):
    kwargs = {'num_workers': 1, 'pin_memory': True} if CUDA else {}
    test_loader = torch.utils.data.DataLoader(
        datasets.CIFAR10('./data', train=False, download=True, transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])),
        batch_size=TEST_BATCH_SIZE, shuffle=True, **kwargs)
    model.eval()
    correct = 0
    with torch.no_grad():
      for data, target in test_loader:
          if CUDA:
              data, target = data.cuda(), target.cuda()
          data, target = Variable(data), Variable(target)
          output = model(data)
          pred = output.data.max(1, keepdim=True)[1]
          correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    print('\nTest set: Accuracy: {}/{} ({:.1f}%)\n'.format(
        correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))
    return correct / float(len(test_loader.dataset))

## 進行剪枝
#### 計算所有Batch Normalizaiton中的scale factor絕對值大小並排序
#### 利用PRUNE_RATIO中取得閥值

In [None]:
total = 0
for m in model.modules():
    if isinstance(m, nn.BatchNorm2d):
        total += m.weight.data.shape[0]

bn = torch.zeros(total)
index = 0
for m in model.modules():
    if isinstance(m, nn.BatchNorm2d):
        size = m.weight.data.shape[0]
        bn[index:(index+size)] = m.weight.data.abs().clone() #將各個gamma取絕對值存到bm
        index += size

y, i = torch.sort(bn) #小到大排序


threshold_index = int(total * PRUNE_PERCENT)
threshold = y[threshold_index]  #排序後的 bn 張量中第 threshold_index 個位置的值。這個值將用作剪枝的閾值
print(threshold)


tensor(0.5365)


##建立CONFIG，之後建立剪枝後網路時需要用到此CONFIG

In [None]:
pruned = 0
cfg = []  #用來建立剪枝網路的CONFIG
cfg_mask = [] #用來幫助剪枝的遮罩

##根據Batch Normalization Layer資訊建立CONFIG
####1.複製Batch Normalization Layer的weight(也就是scale factor)
####2.建立mask，大於threshold的index的值會設成1,小於threshold的值會設成0
####3.大於threshold的index的值加總後，會是剪枝後Layer對應的輸出channel
####4.最後得到要建立剪枝模型的CONFIG

In [None]:
for k, m in enumerate(model.modules()):
    if isinstance(m, nn.BatchNorm2d):
        weight_copy = m.weight.data.clone()
        mask = weight_copy.abs().gt(threshold).float().cuda()

        # 注意: 需自行設計處理剩下channel數為0的情況 (e.g. 至少保留3個channel)
        # 如果不足3個，至少保留3個最大的通道
        if torch.sum(mask) < 3:
            min_channels = 3
            sorted, indices = torch.topk(weight_copy.abs(), min_channels)  # 找到最大的3個通道
            mask[indices] = 1


        pruned = pruned + mask.shape[0] - torch.sum(mask)   #torch.sum(mask)表示保留的通道數
        cfg.append(int(torch.sum(mask)))
        cfg_mask.append(mask.clone())
        print('layer index: {:d} \t total channel: {:d} \t remaining channel: {:d}'.
            format(k, mask.shape[0], int(torch.sum(mask))))
    elif isinstance(m, nn.MaxPool2d):
        cfg.append('M')

pruned_ratio = pruned/total

print(f'PRUNE RATIO={pruned_ratio}')
print('PREPROCESSING SUCCESSFUL!')

print(cfg)


layer index: 3 	 total channel: 64 	 remaining channel: 28
layer index: 6 	 total channel: 64 	 remaining channel: 61
layer index: 10 	 total channel: 128 	 remaining channel: 111
layer index: 13 	 total channel: 128 	 remaining channel: 121
layer index: 17 	 total channel: 256 	 remaining channel: 148
layer index: 20 	 total channel: 256 	 remaining channel: 68
layer index: 23 	 total channel: 256 	 remaining channel: 8
layer index: 26 	 total channel: 256 	 remaining channel: 3
layer index: 30 	 total channel: 512 	 remaining channel: 3
layer index: 33 	 total channel: 512 	 remaining channel: 3
layer index: 36 	 total channel: 512 	 remaining channel: 3
layer index: 39 	 total channel: 512 	 remaining channel: 3
layer index: 43 	 total channel: 512 	 remaining channel: 3
layer index: 46 	 total channel: 512 	 remaining channel: 3
layer index: 49 	 total channel: 512 	 remaining channel: 3
layer index: 52 	 total channel: 512 	 remaining channel: 5
PRUNE RATIO=0.8957121968269348
PREP

###建立剪枝模型

In [None]:
newmodel = vgg(cfg=cfg)
newmodel.cuda()

vgg(
  (feature): Sequential(
    (0): Conv2d(3, 28, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(28, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(28, 61, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (4): BatchNorm2d(61, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(61, 111, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (8): BatchNorm2d(111, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(111, 121, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (11): BatchNorm2d(121, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilatio

###將原本的模型權重複製到剪枝的模型
####1.決定該層的輸入與輸出Channel
####2.根據不同層決定要複製什麼權重
######Batch Normalization Layer
1.   scale factor
2.   bias
3.   running mean
4.   running variance

######Convolutional Layer
1.   weight
2.   bias

######Linear Layer
1.   weight
2.   bias



In [None]:
layer_id_in_cfg = 0
start_mask = torch.ones(3) #3為input channel(R,G,B)
end_mask = cfg_mask[layer_id_in_cfg]
count = 0
for [m0, m1] in zip(model.modules(), newmodel.modules()):
    if isinstance(m0, nn.BatchNorm2d):

        # 處理剪枝後的權重
        m0.weight.data.mul_(end_mask) #weight x (1 or 0)
        m0.bias.data.mul_(end_mask)

        #### 找出遮罩中非零元素的index ####
        ################################################
        #          請填空          #
        nonzero_index = torch.nonzero(end_mask).view(-1) #view(-1)將資料轉換為1d
        ################################################


        # 將原本模型的權重複製到剪枝模型的權重

        #### 複製weight與bias ####
        ################################################
        #          請填空          #
        m1.weight.data = m0.weight.data[nonzero_index].clone()
        m1.bias.data = m0.bias.data[nonzero_index].clone()
        ################################################


        #### 複製running mean跟running variance ####
        ################################################
        #          請填空          #
        m1.running_mean = m0.running_mean[nonzero_index].clone()
        m1.running_var = m0.running_var[nonzero_index].clone()
        ################################################



        layer_id_in_cfg += 1
        start_mask = end_mask.clone()

        #最後一層連接層不做修改
        if layer_id_in_cfg < len(cfg_mask):
            end_mask = cfg_mask[layer_id_in_cfg]
    elif isinstance(m0, nn.Conv2d):
        # 將原本模型的捲積層權重複製到對應剪枝模型卷積層的權重
        idx0 = np.squeeze(np.argwhere(np.asarray(start_mask.cpu().numpy())))
        idx1 = np.squeeze(np.argwhere(np.asarray(end_mask.cpu().numpy())))

        w = m0.weight.data[:, idx0, :, :].clone()
        w = w[idx1, :, :, :].clone()
        m1.weight.data = w.clone()
        #m1.bias.data = m0.bias.data[idx1].clone()
    elif isinstance(m0, nn.Linear):
        # 參考 https://pytorch.org/docs/stable/generated/torch.nn.Linear.html 來決定該如何複製Linear Layer參數

        idx0 = np.squeeze(np.argwhere(np.asarray(start_mask.cpu().numpy())))

        #### 複製weight ####
        ################################################
        #          請填空          #
        m1.weight.data = m0.weight.data[:,idx0].clone()
        ################################################



        #### 複製bias ####
        ################################################
        #          請填空          #
        m1.bias.data = m0.bias.data.clone()
        ################################################



####儲存模型並印出結果


In [None]:
torch.save({'cfg': cfg, 'state_dict': newmodel.state_dict()}, PRUNE_PATH)

print(newmodel)
model = newmodel
test(newmodel)

vgg(
  (feature): Sequential(
    (0): Conv2d(3, 28, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(28, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(28, 61, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (4): BatchNorm2d(61, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(61, 111, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (8): BatchNorm2d(111, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(111, 121, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (11): BatchNorm2d(121, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilatio

tensor(0.1000)

##超參數設定 PRUEN=0.5

In [None]:
DATASET = 'cifar10'
TEST_BATCH_SIZE = 1000
CUDA = True
PRUNE_PERCENT = 0.5 # Change your prune ratio!
WEIGHT_PATH = '/content/drive/MyDrive/Colab Notebooks/model_best.pth'
PRUNE_PATH = '/content/drive/MyDrive/Colab Notebooks/model_prune2.pth'


In [None]:
CUDA = CUDA and torch.cuda.is_available()

model = vgg()
if CUDA:
    model.cuda()

if WEIGHT_PATH:
    if os.path.isfile(WEIGHT_PATH):
        checkpoint = torch.load(WEIGHT_PATH)
        best_prec1 = checkpoint['best_prec1']
        model.load_state_dict(checkpoint['state_dict'])
        print('LOADING CHECKPOINT {} @EPOCH={}, BEST_PREC1={}'.format(WEIGHT_PATH,checkpoint['epoch'],best_prec1))

    else:
        print("NO CHECKPOINT FOUND")

print(model)

LOADING CHECKPOINT /content/drive/MyDrive/Colab Notebooks/model_best.pth @EPOCH=52, BEST_PREC1=0.9265999794006348
vgg(
  (feature): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, trac

In [None]:
def test(model):
    kwargs = {'num_workers': 1, 'pin_memory': True} if CUDA else {}
    test_loader = torch.utils.data.DataLoader(
        datasets.CIFAR10('./data', train=False, download=True, transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])),
        batch_size=TEST_BATCH_SIZE, shuffle=True, **kwargs)
    model.eval()
    correct = 0
    with torch.no_grad():
      for data, target in test_loader:
          if CUDA:
              data, target = data.cuda(), target.cuda()
          data, target = Variable(data), Variable(target)
          output = model(data)
          pred = output.data.max(1, keepdim=True)[1]
          correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    print('\nTest set: Accuracy: {}/{} ({:.1f}%)\n'.format(
        correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))
    return correct / float(len(test_loader.dataset))

In [None]:
total = 0
for m in model.modules():
    if isinstance(m, nn.BatchNorm2d):
        total += m.weight.data.shape[0]

bn = torch.zeros(total)
index = 0
for m in model.modules():
    if isinstance(m, nn.BatchNorm2d):
        size = m.weight.data.shape[0]
        bn[index:(index+size)] = m.weight.data.abs().clone() #將各個gamma取絕對值存到bm
        index += size

y, i = torch.sort(bn) #小到大排序


threshold_index = int(total * PRUNE_PERCENT)
threshold = y[threshold_index]  #排序後的 bn 張量中第 threshold_index 個位置的值。這個值將用作剪枝的閾值
print(threshold)


tensor(0.0221)


In [None]:
pruned = 0
cfg = []  #用來建立剪枝網路的CONFIG
cfg_mask = [] #用來幫助剪枝的遮罩

In [None]:
for k, m in enumerate(model.modules()):
    if isinstance(m, nn.BatchNorm2d):
        weight_copy = m.weight.data.clone()
        mask = weight_copy.abs().gt(threshold).float().cuda()

        # 注意: 需自行設計處理剩下channel數為0的情況 (e.g. 至少保留3個channel)
        # 如果不足3個，至少保留3個最大的通道
        if torch.sum(mask) < 3:
            min_channels = 3
            sorted, indices = torch.topk(weight_copy.abs(), min_channels)  # 找到最大的3個通道
            mask[indices] = 1


        pruned = pruned + mask.shape[0] - torch.sum(mask)   #torch.sum(mask)表示保留的通道數
        cfg.append(int(torch.sum(mask)))
        cfg_mask.append(mask.clone())
        print('layer index: {:d} \t total channel: {:d} \t remaining channel: {:d}'.
            format(k, mask.shape[0], int(torch.sum(mask))))
    elif isinstance(m, nn.MaxPool2d):
        cfg.append('M')

pruned_ratio = pruned/total

print(f'PRUNE RATIO={pruned_ratio}')
print('PREPROCESSING SUCCESSFUL!')

print(cfg)


layer index: 3 	 total channel: 64 	 remaining channel: 55
layer index: 6 	 total channel: 64 	 remaining channel: 64
layer index: 10 	 total channel: 128 	 remaining channel: 128
layer index: 13 	 total channel: 128 	 remaining channel: 128
layer index: 17 	 total channel: 256 	 remaining channel: 256
layer index: 20 	 total channel: 256 	 remaining channel: 255
layer index: 23 	 total channel: 256 	 remaining channel: 253
layer index: 26 	 total channel: 256 	 remaining channel: 246
layer index: 30 	 total channel: 512 	 remaining channel: 308
layer index: 33 	 total channel: 512 	 remaining channel: 188
layer index: 36 	 total channel: 512 	 remaining channel: 171
layer index: 39 	 total channel: 512 	 remaining channel: 114
layer index: 43 	 total channel: 512 	 remaining channel: 89
layer index: 46 	 total channel: 512 	 remaining channel: 106
layer index: 49 	 total channel: 512 	 remaining channel: 142
layer index: 52 	 total channel: 512 	 remaining channel: 248
PRUNE RATIO=0.5

In [None]:
newmodel = vgg(cfg=cfg)
newmodel.cuda()

vgg(
  (feature): Sequential(
    (0): Conv2d(3, 55, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(55, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(55, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilatio

將原本的模型權重複製到剪枝的模型
1.決定該層的輸入與輸出Channel
2.根據不同層決定要複製什麼權重
Batch Normalization Layer
scale factor
bias
running mean
running variance
Convolutional Layer
weight
bias
Linear Layer
weight
bias

In [None]:
layer_id_in_cfg = 0
start_mask = torch.ones(3) #3為input channel(R,G,B)
end_mask = cfg_mask[layer_id_in_cfg]
count = 0
for [m0, m1] in zip(model.modules(), newmodel.modules()):
    if isinstance(m0, nn.BatchNorm2d):

        # 處理剪枝後的權重
        m0.weight.data.mul_(end_mask) #weight x (1 or 0)
        m0.bias.data.mul_(end_mask)

        #### 找出遮罩中非零元素的index ####
        ################################################
        #          請填空          #
        nonzero_index = torch.nonzero(end_mask).view(-1) #view(-1)將資料轉換為1d
        ################################################


        # 將原本模型的權重複製到剪枝模型的權重

        #### 複製weight與bias ####
        ################################################
        #          請填空          #
        m1.weight.data = m0.weight.data[nonzero_index].clone()
        m1.bias.data = m0.bias.data[nonzero_index].clone()
        ################################################


        #### 複製running mean跟running variance ####
        ################################################
        #          請填空          #
        m1.running_mean = m0.running_mean[nonzero_index].clone()
        m1.running_var = m0.running_var[nonzero_index].clone()
        ################################################



        layer_id_in_cfg += 1
        start_mask = end_mask.clone()

        #最後一層連接層不做修改
        if layer_id_in_cfg < len(cfg_mask):
            end_mask = cfg_mask[layer_id_in_cfg]
    elif isinstance(m0, nn.Conv2d):
        # 將原本模型的捲積層權重複製到對應剪枝模型卷積層的權重
        idx0 = np.squeeze(np.argwhere(np.asarray(start_mask.cpu().numpy())))
        idx1 = np.squeeze(np.argwhere(np.asarray(end_mask.cpu().numpy())))

        w = m0.weight.data[:, idx0, :, :].clone()
        w = w[idx1, :, :, :].clone()
        m1.weight.data = w.clone()
        #m1.bias.data = m0.bias.data[idx1].clone()
    elif isinstance(m0, nn.Linear):
        # 參考 https://pytorch.org/docs/stable/generated/torch.nn.Linear.html 來決定該如何複製Linear Layer參數

        idx0 = np.squeeze(np.argwhere(np.asarray(start_mask.cpu().numpy())))

        #### 複製weight ####
        ################################################
        #          請填空          #
        m1.weight.data = m0.weight.data[:,idx0].clone()
        ################################################



        #### 複製bias ####
        ################################################
        #          請填空          #
        m1.bias.data = m0.bias.data.clone()
        ################################################



儲存模型並印出結果

In [None]:
torch.save({'cfg': cfg, 'state_dict': newmodel.state_dict()}, PRUNE_PATH)

print(newmodel)
model = newmodel
test(newmodel)

vgg(
  (feature): Sequential(
    (0): Conv2d(3, 55, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(55, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(55, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilatio

tensor(0.1000)