In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init as init
import torch.nn.functional as F
import visdom
import copy
import torch.nn.utils.prune as prune
from tqdm.notebook import tqdm
import numpy as np
import timeit

# custom librarys (model, parameters...) Lottery_Ticket_Prac/custom/utils.py
import custom.utils as cu

In [2]:
torch.manual_seed(55)
torch.cuda.manual_seed_all(55)
torch.backends.cudnn.enabled = False

In [3]:
# cuda setting
GPU_NUM = 1
device = torch.device(f'cuda:{GPU_NUM}' if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(device)

print ('Available devices ', torch.cuda.device_count())
print ('Current cuda device ', torch.cuda.current_device())
print(torch.cuda.get_device_name(device))

print("cpu와 cuda 중 다음 기기로 학습함:", device, '\n')

Available devices  2
Current cuda device  1
GeForce RTX 2080 Ti
cpu와 cuda 중 다음 기기로 학습함: cuda:1 



In [4]:
# set model type
model_type = 'LeNet300'
#model_type = 'Conv6'

best_accu = []

# model, parameter get
param = cu.parameters()

if model_type == 'LeNet300':
    model = cu.LeNet300().to(device)
elif model_type == 'Conv6':
    model = cu.Conv6().to(device)
#elif ...
    
param.type(model_type)    
model_init = copy.deepcopy(model)
criterion = nn.CrossEntropyLoss().to(device)

# parameter check
print('\n'.join("%s: %s" % item for item in param.__dict__.items()))

model_type: LeNet300
lr: 0.0012
epochs: 50
batch_size: 60
weight_decay: 0.0012
iteration: 0
prune_per_c: 1
prune_per_f: 0.2
prune_per_o: 0.1
noi: 12
trainset: Dataset MNIST
    Number of datapoints: 60000
    Root location: ../MNIST_data/
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.1307,), std=(0.3081,))
           )
valset: empty
testset: Dataset MNIST
    Number of datapoints: 10000
    Root location: ../MNIST_data/
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.1307,), std=(0.3081,))
           )
train_loader: <torch.utils.data.dataloader.DataLoader object at 0x7f80d416bed0>
val_loader: empty
test_loader: <torch.utils.data.dataloader.DataLoader object at 0x7f80d416be10>


In [5]:
# visdom setting
vis = visdom.Visdom()
vis.close(env="main")

Tracker_type = "Accuracy_Tracker"
title = model_type + "_" + Tracker_type

# make plot
vis_plt = vis.line(X=torch.Tensor(1).zero_(), Y=torch.Tensor(1).zero_(), 
                    opts=dict(title = title,
                              legend=['100.0'],
                             showlegend=True,
                              xtickmin = 0,
                              xtickmax = 20000,
                              ytickmin = 0.95,
                              ytickmax = 0.99
                             )
                   )

def visdom_plot(loss_plot, num, loss_value, name):
    vis.line(X = num,
            Y = loss_value,
            win = loss_plot,
            name = name,
            update = 'append'
            )

Setting up a new session...


In [6]:
# change parameter for test (class에 직접 접근하여 변경)
param.epochs = 5
param.noi = 5
"""
"""

'\n'

In [7]:
# train, test, prune function
def train(model, dataloader, optimizer, criterion, cp_mask):
    model.train()
    running_loss = 0.0
    for batch_idx, (data, label) in enumerate(dataloader):
        data, label = data.to(device), label.to(device)
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, label)
        loss.backward()
        optimizer.step()
        running_loss += loss / len(dataloader)
    return running_loss

def test(model, dataloader, criterion):
    model.eval()
    correct = 0
    total = 0
    test_loss = 0
    with torch.no_grad():
        for data, label in dataloader:
            data, label = data.to(device), label.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            loss = criterion(outputs, label)

            test_loss += loss / len(dataloader)
            total += label.size(0)
            correct += (predicted == label).sum().item()
        # 로더 -> 배치 개수 로더.dataset -> 전체 길이, 
    return (correct/total), test_loss

# prune function
# pruning mask 생성 -> mask 복사 -> weight initialize -> prune 진행
def weight_init(model1, model2, c_rate, f_rate, o_rate):
    # layer별로 지정된 rate만큼 prune mask 생성
    for name, module in model1.named_modules():
        if isinstance(module, nn.Conv2d):
            prune.l1_unstructured(module, name = 'weight', amount = c_rate)
        if isinstance(module, nn.Linear):
            if name != 'fc3':
                prune.l1_unstructured(module, name = 'weight', amount = f_rate)
            else:
                prune.l1_unstructured(module, name = 'weight', amount = o_rate)
                        
    # mask 복사
    cp_mask = {}
    for name, mask in model1.named_buffers():
        cp_mask[name[:(len(name)-12)]] = mask
    
    # weight initialize
    for name, p in model1.named_parameters():
        if 'weight_orig' in name:
            for name2, p2 in model2.named_parameters():
                if name[0:len(name) - 5] in name2:
                    p.data = copy.deepcopy(p2.data)
        if 'bias_orig' in name:
            for name2, p2 in model2.named_parameters():
                if name[0:len(name) - 5] in name2:
                    p.data = copy.deepcopy(p2.data)
                    
    # prune 진행
    for name, module in model1.named_modules():
        if isinstance(module, nn.Conv2d):
            prune.remove(module, name = 'weight')
        elif isinstance(module, nn.Linear):
            prune.remove(module, name = 'weight')            
    
    # gradient hook
    for name, module in model.named_modules():
        if 'fc' in name:
            module.weight.register_hook(lambda grad, name=name : grad.mul_(cp_mask[name]))
    
    optimizer = optim.Adam(model.parameters(), lr = param.lr, weight_decay = param.weight_decay)
    
    
    # copy된 mask return
    return cp_mask, optimizer

# weight count function
# dict type ['Layer name' : [all, non_zero, zero, ratio]]
def weight_counter(model):
    layer_weight = {'all.weight':[0, 0, 0, 0]}
    
    for name, p in model.named_parameters():
        if 'weight' in name:
            remain, pruned = (p != 0).sum().item(), (p == 0).sum().item()
            layer_weight[name] = [remain+pruned, remain, pruned, round((remain/(remain+pruned))*100, 2)]
            
    for i in layer_weight.keys():
        for j in range(0, 3):
            layer_weight['all.weight'][j] += layer_weight[i][j]
    layer_weight['all.weight'][3] = round(layer_weight['all.weight'][1]/layer_weight['all.weight'][0]*100, 2)
    #print("-----------------------------------------------------")
    print("------------------------------------------------------------\n",
          "Layer".center(12), "Weight".center(39), "Ratio(%)".rjust(7), sep='')
    for i in layer_weight.keys():
        
        print("%s" % i.ljust(13), ":",
              ("%s (%s | %s)" % (layer_weight[i][0], layer_weight[i][1], layer_weight[i][2])).center(36),
              ("%.2f" % layer_weight[i][3]).rjust(7),
              sep=''
             )
    print("------------------------------------------------------------")
    return layer_weight

# print best accuracy in each iteration
def best_accuracy(best_accu):
    print("Maximum accuracy per weight remaining")
    for i in range(len(best_accu)):
        print("Remaining weight %.1f %% " % (best_accu[i][0] * 100),
             "Epoch %d" % best_accu[i][1],
             "Accu %.4f %%" % best_accu[i][2])

In [8]:
for i in range(param.noi):
    best_accu.append(0)
    best_accu[i] = [0, 0, 0]
    cp_mask = []
    # pruning weight, mask 복사, optimizer 재설정
    # layer별 prune rate를 입력
    cp_mask, optimizer = weight_init(model, model_init, 
                           (1 - ((1-param.prune_per_c) ** i)),
                           (1 - ((1-param.prune_per_f) ** i)),
                           (1 - ((1-param.prune_per_o) ** i))
                          )
    #print(model.fc1.weight[0][300:325])
    # prune 진행 후 남은 weight 수 확인
    weight_counts = weight_counter(model)
    # 총 weight 중 남은 weight의 수 저장 (visdom plot시 사용하기 위함)
    remaining_weight = weight_counts['all.weight'][3]
    print("\n Learning start! (Round : %d, Remaining weight : %s %%) \n" % (i+1 , remaining_weight))
    # 시작 시간 check
    start_time = timeit.default_timer()
    
    for epoch in tqdm(range(param.epochs)):
        # 최초 정확도 확인
        if epoch == 0:
            accuracy, test_loss = test(model, param.test_loader, criterion)
            visdom_plot(vis_plt,torch.Tensor([accuracy]), torch.Tensor([0]),
                        str(remaining_weight)
                       )
            print('[epoch : %d]' % (epoch),
             '(r_loss: x.xxxxx)',
             '(t_loss: x.xxxxx)',
             '(accu: %.4f)' % (accuracy)
             )
        # model training    
        running_loss = train(model, param.train_loader, optimizer, criterion, cp_mask)
        
        # val_set이 있을 경우 val_set을 통해 loss, accu를 구한다.
        if param.valset == 'empty':
            accuracy, test_loss = test(model, param.test_loader, criterion)
        else:
            accuracy, test_loss = test(model, param.val_loader, criterion)
        
        # visdom plot (plot window, x-axis, y-axis, label name)
        visdom_plot(vis_plt, torch.Tensor([(epoch+1) * 1000]), torch.Tensor([accuracy]),
                    str(remaining_weight)
                   )
        
        # best accuracy list (weight_remain, epoch, accuracy)
        if best_accu[i][2] <= accuracy:
            best_accu[i] = [remaining_weight, epoch, accuracy]
        
        print('[epoch : %d]' % (epoch+1),
             '(r_loss: %.5f)' % (running_loss),
             '(t_loss: %.5f)' % (test_loss),
             '(accu: %.4f)' % (accuracy)
             )
        
    stop_time = timeit.default_timer()    
    #print(model.fc1.weight[0][300:325])
    print("Finish!",
          "(Best accu: %.4f)" % best_accu[i][2],
          "(Time taken(sec) : %.2f)" % (stop_time - start_time),
          "\n\n\n\n\n\n\n")

# iteration별 최고 정확도 확인
best_accuracy(best_accu)

------------------------------------------------------------
   Layer                     Weight                Ratio(%)
all.weight   :        266200 (266200 | 0)          100.00
fc1.weight   :        235200 (235200 | 0)          100.00
fc2.weight   :         30000 (30000 | 0)           100.00
fc3.weight   :          1000 (1000 | 0)            100.00
------------------------------------------------------------

 Learning start! (Round : 1, Remaining weight : 100.0 %) 



HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

[epoch : 0] (r_loss: x.xxxxx) (t_loss: x.xxxxx) (accu: 0.0735)
[epoch : 1] (r_loss: 0.20805) (t_loss: 0.14887) (accu: 0.9502)
[epoch : 2] (r_loss: 0.10718) (t_loss: 0.11349) (accu: 0.9627)
[epoch : 3] (r_loss: 0.09354) (t_loss: 0.10031) (accu: 0.9679)
[epoch : 4] (r_loss: 0.08629) (t_loss: 0.09715) (accu: 0.9680)
[epoch : 5] (r_loss: 0.08111) (t_loss: 0.08932) (accu: 0.9716)

Finish! (Best accu: 0.9716) (Time taken(sec) : 58.78) 







------------------------------------------------------------
   Layer                     Weight                Ratio(%)
all.weight   :      266200 (213060 | 53140)         80.04
fc1.weight   :      235200 (188160 | 47040)         80.00
fc2.weight   :        30000 (24000 | 6000)          80.00
fc3.weight   :          1000 (900 | 100)            90.00
------------------------------------------------------------

 Learning start! (Round : 2, Remaining weight : 80.04 %) 



HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

[epoch : 0] (r_loss: x.xxxxx) (t_loss: x.xxxxx) (accu: 0.1287)
[epoch : 1] (r_loss: 0.19947) (t_loss: 0.11859) (accu: 0.9628)
[epoch : 2] (r_loss: 0.10571) (t_loss: 0.10958) (accu: 0.9642)
[epoch : 3] (r_loss: 0.08939) (t_loss: 0.12707) (accu: 0.9587)
[epoch : 4] (r_loss: 0.08335) (t_loss: 0.09968) (accu: 0.9676)



KeyboardInterrupt: 