## 掛載雲端硬碟

In [1]:
# from google.colab import drive
# drive.mount('/content/drive')

## 更改檔案所在路徑


In [2]:
# Change to your own folder !!!
# %cd /content/drive/MyDrive/your own folder/

## 載入函式庫


In [3]:
import os

import torch
import torch.nn as nn
from torch.autograd import Variable
from torchvision import datasets, transforms
import numpy as np

from models.resnet import ResNet50

## 超參數設定

In [4]:
DATASET = 'cifar10'
TEST_BATCH_SIZE = 1000
CUDA = True
PRUNE_PERCENT = 0.9 # Change your prune ratio!
WEIGHT_PATH = 'model_best.pth' # Change to your own folder !!!
PRUNE_PATH = 'model_prune.pth' # Change to your own folder !!!

## 載入模型

In [5]:
import sys
print(sys.executable)

!pip show torch | grep Location
!python -c "import torch; print(torch.__version__)"
print(torch.__version__)
print(torch.version.cuda)
print(torch.cuda.is_available())
x = torch.randn(5, 5).cuda()
print("✅ CUDA OK:", x.device, x.sum().item())


/home/n26141826/EAI_Lab2/.venv/bin/python
Location: /home/n26141826/EAI_Lab2/.venv/lib/python3.12/site-packages
2.6.0+cu124
2.6.0+cu124
12.4
True
✅ CUDA OK: cuda:0 0.7797485589981079


In [6]:
CUDA = CUDA and torch.cuda.is_available()

model = ResNet50(num_classes=10)
for name, param in model.named_parameters():
    if param.numel() == 0:
        print(f"[ERROR] Empty tensor detected at {name}, shape={param.shape}")

if CUDA:
    model.cuda()

if WEIGHT_PATH:
    if os.path.isfile(WEIGHT_PATH):
        checkpoint = torch.load(WEIGHT_PATH)
        best_prec1 = checkpoint['best_prec1']
        model.load_state_dict(checkpoint['state_dict'])
        print('LOADING CHECKPOINT {} @EPOCH={}, BEST_PREC1={}'.format(WEIGHT_PATH,checkpoint['epoch'],best_prec1))

    else:
        print("NO CHECKPOINT FOUND")

print(model)

LOADING CHECKPOINT model_best.pth @EPOCH=36, BEST_PREC1=0.9119
ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequentia

## 進行剪枝
#### 計算所有Batch Normalizaiton中的scale factor絕對值大小並排序
#### 利用設定好的PRUNE_PERCENT來取得閥值

In [7]:
total = 0
for m in model.modules():
    if isinstance(m, nn.BatchNorm2d):
        total += m.weight.data.shape[0]

bn = torch.zeros(total)
index = 0
for m in model.modules():
    if isinstance(m, nn.BatchNorm2d):
        size = m.weight.data.shape[0]
        bn[index:(index+size)] = m.weight.data.abs().clone()
        index += size

y, i = torch.sort(bn)

threshold_index = int(total * PRUNE_PERCENT)
threshold = y[threshold_index]


## 根據Batch Normalization Layer資訊建立CONFIG
#### 1. 複製Batch Normalization Layer的weight(也就是scale factor γ)
#### 2. 建立mask，大於threshold的index的值會設成1,小於threshold的值會設成0
#### 3. mask的值加總後，會是剪枝後Layer對應的輸出channel數
#### 4. 最後得到要建立剪枝模型的CONFIG

In [8]:
pruned = 0
cfg = []  #用來建立剪枝網路的CONFIG
cfg_mask = [] #用來幫助剪枝的遮罩

In [9]:
cfg = []
cfg_mask = []
preserved_cfg_idx = []

cfg_idx = 0
for name, m in model.named_modules():
    if isinstance(m, nn.BatchNorm2d):
        if name.endswith("bn3") or "downsample.1" in name:
            if name.endswith("downsample.1"):
                continue
            # preserve shortcut
            mask = torch.ones_like(m.weight.data)
            preserved_cfg_idx.append(cfg_idx)
        else:
            weight_copy = m.weight.data.clone()
            if torch.any(torch.isnan(weight_copy)):
                print(f"[NaN Detected] in {name}")
            mask = weight_copy.abs().gt(threshold).float().cuda()
            if int(torch.sum(mask)) == 0:
                _, sorted_idx = torch.topk(weight_copy.abs(), 3)
                mask[sorted_idx] = 1.0

        # cfg_mask.append(mask.clone())
        cfg_mask.append(mask.clone().cuda())
        cfg.append(int(torch.sum(mask)))
        cfg_idx += 1
        print(f"cfg_index: {cfg_idx} -> layer index: {name}, preserved={cfg_idx in preserved_cfg_idx}, total channel: {mask.shape[0]}, remaining channel: {int(torch.sum(mask))}")


pruned_ratio = pruned/total

print(f'PRUNE RATIO={pruned_ratio}')
print('PREPROCESSING SUCCESSFUL!')

print(f'cfg: {cfg}')

cfg_index: 1 -> layer index: bn1, preserved=False, total channel: 64, remaining channel: 64
cfg_index: 2 -> layer index: layer1.0.bn1, preserved=False, total channel: 64, remaining channel: 64
cfg_index: 3 -> layer index: layer1.0.bn2, preserved=False, total channel: 64, remaining channel: 64
cfg_index: 4 -> layer index: layer1.0.bn3, preserved=False, total channel: 256, remaining channel: 256
cfg_index: 5 -> layer index: layer1.1.bn1, preserved=False, total channel: 64, remaining channel: 64
cfg_index: 6 -> layer index: layer1.1.bn2, preserved=False, total channel: 64, remaining channel: 64
cfg_index: 7 -> layer index: layer1.1.bn3, preserved=False, total channel: 256, remaining channel: 256
cfg_index: 8 -> layer index: layer1.2.bn1, preserved=False, total channel: 64, remaining channel: 64
cfg_index: 9 -> layer index: layer1.2.bn2, preserved=False, total channel: 64, remaining channel: 64
cfg_index: 10 -> layer index: layer1.2.bn3, preserved=False, total channel: 256, remaining chann

In [10]:
# preserved_cfg_idx = [2,3,6,7,9,10,12,13,16,17,18,19,22,23,25,26,29,30,31,32,35,36,38,39,42,43,45,46,49,50]
# bn_index = 0

# for k, m in enumerate((model.modules())):
#     if isinstance(m, nn.BatchNorm2d):
#         weight_copy = m.weight.data.clone()
#         # mask = weight_copy.abs().gt(threshold).float().cuda() # 大於 threshold 的設為 True (1.0)，其餘為 False(0.0)

#         # 注意: 需自行設計處理剩下channel數為0的情況 (e.g. 至少保留3個channel)
#         ################################################
#         #          請填空          #
#         ################################################
#         if bn_index in preserved_cfg_idx:
#             # 保留這些BN層（通常是Bottleneck中的最後一層）
#             mask = torch.ones_like(weight_copy).cuda()
#         else:
#             # 否則就做 threshold-based pruning
#             mask = weight_copy.abs().gt(threshold).float().cuda()

#             # 若全部被剪掉，強制保留前三大（可調成至少3個）
#             if int(torch.sum(mask)) == 0:
#                 _, sorted_idx = torch.topk(weight_copy.abs(), 3)
#                 mask[sorted_idx] = 1.0
#         bn_index += 1


#         # 處理剪枝後的權重
#         m.weight.data.mul_(mask)
#         m.bias.data.mul_(mask)
#         pruned = pruned + mask.shape[0] - torch.sum(mask)
#         cfg.append(int(torch.sum(mask)))    # 記錄每一層 BN 剩下幾個通道
#         cfg_mask.append(mask.clone())     # 儲存每層對應的 mask
#         # print('layer index: {:d} \t total channel: {:d} \t remaining channel: {:d}'.
#         #     format(k, mask.shape[0], int(torch.sum(mask))))
#         print(f"cfg_index: {bn_index} -> layer index: {k}, preserved={bn_index in preserved_cfg_idx}, total channel: {mask.shape[0]}, remaining channel: {int(torch.sum(mask))}")
# pruned_ratio = pruned/total

# print(f'PRUNE RATIO={pruned_ratio}')
# print('PREPROCESSING SUCCESSFUL!')

# print(f'cfg: {cfg}')


## 建立剪枝模型

In [11]:
newmodel = ResNet50(num_classes=10, cfg=cfg, )
newmodel.cuda()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

### 將原本的模型權重複製到剪枝的模型
#### 根據不同層決定要複製什麼權重
###### Batch Normalization Layer
1.   scale factor
2.   bias
3.   running mean
4.   running variance

###### Convolutional Layer
1.   weight

###### Linear Layer
1.   weight
2.   bias




In [14]:
import numpy as np
import torch

def transfer_pruned_weights(model, newmodel, cfg_mask):
    model.eval()
    newmodel.eval()
    layer_id_in_cfg = 0      # 遮罩列表索引
    prev_mask = torch.ones(1)  # 初始化前一層輸出遮罩，對於第一個卷積層的輸入（如RGB 3通道），用全1
    for (name, m0) in model.named_modules():
        m1 = dict(newmodel.named_modules()).get(name)
        if m1 is None:
            continue  # 新模型可能沒有對應模組（通常不會發生，因架構相同）
        # 處理 BatchNorm2d 層
        if isinstance(m0, torch.nn.BatchNorm2d):
            if "downsample.1" in name:
                # 捷徑 BN 層，不使用遮罩，直接複製
                m1.weight.data.copy_(m0.weight.data)
                m1.bias.data.copy_(m0.bias.data)
                m1.running_mean.copy_(m0.running_mean)
                m1.running_var.copy_(m0.running_var)
            else:
                # 非捷徑 BN 層，使用 cfg_mask 遮罩進行通道選擇
                mask = cfg_mask[layer_id_in_cfg]  # 取出對應遮罩
                idx = np.squeeze(np.argwhere(np.asarray(mask.cpu().numpy())))
                if idx.size == 1:
                    idx = np.resize(idx, (1,))
                # 複製選定通道的 BN 參數
                m1.weight.data = m0.weight.data[idx.tolist()].clone()
                m1.bias.data   = m0.bias.data[idx.tolist()].clone()
                m1.running_mean = m0.running_mean[idx.tolist()].clone()
                m1.running_var  = m0.running_var[idx.tolist()].clone()
                # 更新 prev_mask 為當前遮罩，供下一層卷積使用
                prev_mask = mask.clone()
                layer_id_in_cfg += 1
        # 處理 Conv2d 層
        elif isinstance(m0, torch.nn.Conv2d):
            if "downsample.0" in name or m0.weight.data.shape == m1.weight.data.shape:
                # 捷徑卷積層或未剪枝的卷積，直接完整拷貝權重（以及偏置）
                m1.weight.data.copy_(m0.weight.data)
                if m0.bias is not None:
                    m1.bias.data.copy_(m0.bias.data)
            else:
                # 剪枝過的卷積層，按照 prev_mask 和下一遮罩進行權重拷貝
                curr_mask = cfg_mask[layer_id_in_cfg]  # 注意：Conv緊隨上一個BN，layer_id_in_cfg尚未增1，此時取當前索引即下一層BN遮罩
                # 提取索引列表
                in_idx = np.squeeze(np.argwhere(np.asarray(prev_mask.cpu().numpy())))
                out_idx = np.squeeze(np.argwhere(np.asarray(curr_mask.cpu().numpy())))
                if in_idx.size == 1:
                    in_idx = np.resize(in_idx, (1,))
                if out_idx.size == 1:
                    out_idx = np.resize(out_idx, (1,))
                # 根據索引篩選權重張量並複製
                w = m0.weight.data[:, in_idx.tolist(), :, :].clone()
                w = w[out_idx.tolist(), :, :, :].clone()
                m1.weight.data = w.clone()
                # 複製偏置（若有）
                if m0.bias is not None:
                    b = m0.bias.data[out_idx.tolist()].clone()
                    m1.bias.data = b.clone()
                # **注意**：此處不增 layer_id_in_cfg，因為對應遮罩會在隨後的 BN 處理時增1
        # 處理 Linear 層
        elif isinstance(m0, torch.nn.Linear):
            # 線性層未剪枝，直接拷貝權重和偏置
            m1.weight.data.copy_(m0.weight.data)
            m1.bias.data.copy_(m0.bias.data)

# 執行範例
transfer_pruned_weights(model, newmodel, cfg_mask)


In [None]:
# old_modules = list(model.modules())
# new_modules = list(newmodel.modules())

# layer_id_in_cfg = 0
# start_mask = torch.ones(3) #3為input channel(R,G,B)
# end_mask = cfg_mask[layer_id_in_cfg]
# bn_count = 0
# for layer_id in range(len(old_modules)):

#     m0 = old_modules[layer_id]
#     m1 = new_modules[layer_id]

#     if isinstance(m0, nn.BatchNorm2d):
#         bn_count += 1

#         #### 找出遮罩中非零元素的index ####
#         ################################################
#         #          請填空          #
#         ################################################
#         idx = np.squeeze(np.argwhere(np.asarray(mask.cpu().numpy())))
#         if idx.size == 1:  # 若僅剩一個通道，確保維度正確
#             idx = np.resize(idx, (1,))
        
#         # #### 複製weight, bias, running mean,and running variance ####
#         # ################################################
#         # #          請填空          #
#         # ################################################
#         new_bn.weight.data = old_bn.weight.data[idx.tolist()].clone()
#         new_bn.bias.data   = old_bn.bias.data[idx.tolist()].clone()
#         new_bn.running_mean = old_bn.running_mean[idx.tolist()].clone()
#         new_bn.running_var  = old_bn.running_var[idx.tolist()].clone()

#         layer_id_in_cfg += 1
#         start_mask = end_mask.clone()

#         #最後一層連接層不做修改
#         if layer_id_in_cfg < len(cfg_mask):
#             end_mask = cfg_mask[layer_id_in_cfg]

#     elif isinstance(m0, nn.Conv2d):
#         if isinstance(old_modules[layer_id + 1], nn.BatchNorm2d):
#             print(f"m0: {m0}")
#             print(f"m1: {m1}")
#             print(f"\n[Layer {layer_id}] Conv2d pruning")
#             print(f"start_mask: {start_mask.shape}, sum={start_mask.sum().item()}")
#             print(f"type(end_mask)={type(end_mask)} | device={end_mask.device} | dtype={end_mask.dtype}")
#             try:
#                 end_mask_cpu = end_mask.detach().cpu()
#                 print(f"end_mask (cpu): {end_mask_cpu}")
#                 print(f"end_mask shape: {end_mask_cpu.shape}, sum={end_mask_cpu.sum().item()}")
#             except Exception as e:
#                 print("[ERROR] Moving end_mask to CPU failed")
#                 raise e

#             print(f"end_mask: {end_mask.shape}, sum={end_mask.sum().item()}")
#             idx0 = np.squeeze(np.argwhere(np.asarray(start_mask.cpu().numpy())))
#             idx1 = np.squeeze(np.argwhere(np.asarray(end_mask.cpu().numpy())))
#             #### 複製weight ####
#             ################################################
#             #          請填空          #
#             ################################################
#             if idx0.ndim == 0:
#                 idx0 = np.expand_dims(idx0, 0)
#             if idx1.ndim == 0:
#                 idx1 = np.expand_dims(idx1, 0)
            
#             w = m0.weight.data[idx1.tolist(), :, :, :].clone()
#             w = w[:, idx0.tolist(), :, :].clone()

#             m1.weight.data = w.clone()  # 這行很重要！

#               # downsample 層不用prune
#         else:
#             m1.weight.data = m0.weight.data.clone()

#     elif isinstance(m0, nn.Linear):

#         idx0 = np.squeeze(np.argwhere(np.asarray(start_mask.cpu().numpy())))
#         if idx0.ndim == 0:
#             idx0 = np.expand_dims(idx0, 0)

#         #### 複製weight ####
#         ################################################
#         #          請填空          #
#         ################################################
#         m1.weight.data = m0.weight.data[:, idx0].clone()

#         #### 複製bias ####
#         m1.bias.data = m0.bias.data.clone()


m0: Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
m1: Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)

[Layer 1] Conv2d pruning
start_mask: torch.Size([3]), sum=3.0
type(end_mask)=<class 'torch.Tensor'> | device=cuda:0 | dtype=torch.float32
end_mask (cpu): tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
end_mask shape: torch.Size([64]), sum=64.0
end_mask: torch.Size([64]), sum=64.0


NameError: name 'old_bn' is not defined

## 測試函數




In [15]:
def test(model):
    kwargs = {'num_workers': 1, 'pin_memory': True} if CUDA else {}
    test_loader = torch.utils.data.DataLoader(
        datasets.CIFAR10('./data', train=False, download=True, transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])),
        batch_size=TEST_BATCH_SIZE, shuffle=True, **kwargs)

    model.eval()
    correct = 0
    with torch.no_grad():
      for data, target in test_loader:
          if CUDA:
              data, target = data.cuda(), target.cuda()
          data, target = Variable(data), Variable(target)
          output = model(data)
          pred = output.data.max(1, keepdim=True)[1]
          correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    print('\nTest set: Accuracy: {}/{} ({:.1f}%)\n'.format(
        correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))
    return correct / float(len(test_loader.dataset))

## 儲存模型並印出結果，以及剪枝後的test acc


In [16]:
print("cfg: ", cfg)
torch.save({'cfg': cfg, 'state_dict': newmodel.state_dict()}, PRUNE_PATH)

print(newmodel)
model = newmodel.cuda()
test(model)

cfg:  [64, 64, 64, 256, 64, 64, 256, 64, 64, 256, 117, 121, 512, 92, 121, 512, 69, 75, 512, 69, 71, 512, 66, 63, 1024, 14, 24, 1024, 1, 8, 1024, 1, 8, 1024, 3, 2, 1024, 3, 3, 1024, 4, 2, 2048, 3, 3, 2048, 3, 3, 2048]
ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
  

tensor(0.2608)