In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init as init
import torch.nn.functional as F
import visdom
import copy
import torch.nn.utils.prune as prune
from tqdm.notebook import tqdm
import numpy as np
import timeit

# custom librarys
import custom.model as cm # 저장된 model

ModuleNotFoundError: No module named 'custom.model'

In [None]:
#torch.manual_seed(55)
#torch.cuda.manual_seed_all(55)
#torch.backends.cudnn.enabled = False

In [None]:
GPU_NUM = 1
device = torch.device(f'cuda:{GPU_NUM}' if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(device)

print ('Available devices ', torch.cuda.device_count())
print ('Current cuda device ', torch.cuda.current_device())
print(torch.cuda.get_device_name(device))

print("cpu와 cuda 중 다음 기기로 학습함:", device, '\n')

In [None]:
# visdom setting
vis = visdom.Visdom()
vis.close(env="main")

# make plot
vis_plt = vis.line(X=torch.Tensor(1).zero_(), Y=torch.Tensor(1).zero_(), 
                    opts=dict(title = 'LeNet300_Accuracy_Tracker',
                              legend=['100'],
                             showlegend=True,
                              xtickmin = 0,
                              xtickmax = 20000,
                              ytickmin = 0.95,
                              ytickmax = 0.99
                             )
                   )

def visdom_plot(loss_plot, loss_value, num, name):
    vis.line(X = num,
            Y = loss_value,
            win = loss_plot,
            name = name,
            update = 'append'
            )

In [None]:
# parameter
lr = 0.0012
#epochs = 50
#epochs = 20
epochs = 30
batch_size = 60
weight_decay = 1.2e-3
iteration = 0
remaining_weight = 1
prune_per = 0.2
# number of iteration
noi = 11

switch = 0
best_accu = []
# 마지막 layer의 Pruning rate는 기존의 1/2
# prune_per_ll = prune_per/2

In [None]:
transforms = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

mnist_train = dsets.MNIST(root='../MNIST_data/',
                         train=True,
                         transform=transforms,
                         download=True)
mnist_test = dsets.MNIST(root='../MNIST_data/',
                        train=False,
                        transform=transforms,
                        download=True)

train_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                         batch_size=batch_size,
                                         shuffle=True,
                                         drop_last=True)
test_loader = torch.utils.data.DataLoader(dataset=mnist_test,
                                         shuffle=False,
                                         drop_last=True)

In [None]:
def train(model, dataloader, optimizer, criterion, cp_mask, switch):
    model.train()
    running_loss = 0.0
    EPS = 1e-6
    for data, label in dataloader:
        data, label = data.to(device), label.to(device)
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, label)
        loss.backward()

        if switch == 1:
            # 0-weight 학습 방지 code
            i = 0
            for name, p in model.named_parameters():
                if 'weight' in name:
                    p.grad.data *= cp_mask[i]
                    i += 1

        optimizer.step()

        running_loss = loss / len(dataloader)
    return running_loss

def test(model, dataloader, criterion):
    model.eval()
    correct = 0.0
    total = 0.0
    with torch.no_grad():
        for data, label in test_loader:
            data, label = data.to(device), label.to(device)
            outputs = model(data)
            
            predicted = torch.argmax(outputs.data, 1)
            total += label.size(0)
            correct += (predicted == label).sum().item()
            accuracy = (correct/total)

    return accuracy

# prune function
# pruning mask 생성 -> mask 복사 -> init값 복사 -> prune 진행
def weight_init(model1, model2, rate):
    # prune mask 생성
    for name, module in model1.named_modules():
        if isinstance(module, nn.Linear):
            # bottle neck 방지
            if name == 'fc3':
                prune.l1_unstructured(module, name = 'weight', amount = (rate/2))
            else:
                prune.l1_unstructured(module, name = 'weight', amount = rate)
                        
    # mask 복사
    cp_mask = []
    for name, mask in model1.named_buffers():
        cp_mask.append(mask)
    # init 값 복사
    
    for name, p in model1.named_parameters():
        if 'weight_orig' in name:
            for name2, p2 in model2.named_parameters():
                if name[0:len(name) - 5] in name2:
                    p.data = copy.deepcopy(p2.data)
                    break
        if 'bias_orig' in name:
            for name2, p2 in model2.named_parameters():
                if name[0:len(name) - 5] in name2:
                    p.data = copy.deepcopy(p2.data)
                    break
                    
    # prune 진행
    for name, module in model1.named_modules():
        if isinstance(module, nn.Linear):
            prune.remove(module, name = 'weight')
            
    return cp_mask

In [None]:
model = cm.LeNet300().to(device)
model_init = copy.deepcopy(model)

criterion = nn.CrossEntropyLoss().to(device)
#optimizer = optim.Adam(model.parameters(), lr = lr, weight_decay = 1.2e-3)

In [None]:
#EPS = 1e-6
# number of weight
a = ((model.fc1.weight != 0).sum(dim=1)).sum(dim=0) + ((model.fc2.weight != 0).sum(dim=1)).sum(dim=0) + ((model.fc3.weight != 0).sum(dim=1)).sum(dim=0)
#b = ((model.fc1.weight == 0).sum(dim=1)).sum(dim=0) + ((model.fc2.weight == 0).sum(dim=1)).sum(dim=0) + ((model.fc3.weight == 0).sum(dim=1)).sum(dim=0)
b = ((model.fc1.weight == 0).sum(dim=1)).sum(dim=0) + ((model.fc2.weight == 0).sum(dim=1)).sum(dim=0) + ((model.fc3.weight == 0).sum(dim=1)).sum(dim=0)

now = (a + b)

In [None]:
def calc_now(model):
    fc1_1 = ((model.fc1.weight != 0).sum(dim=1)).sum(dim=0)
    fc1_0 = ((model.fc1.weight == 0).sum(dim=1)).sum(dim=0)
    fc1 = fc1_1 + fc1_0
    fc1_p = fc1_0 / fc1_1
    fc2_1 = ((model.fc2.weight != 0).sum(dim=1)).sum(dim=0)
    fc2_0 = ((model.fc2.weight == 0).sum(dim=1)).sum(dim=0)
    fc2 = fc2_1 + fc2_0
    fc3_1 = ((model.fc3.weight != 0).sum(dim=1)).sum(dim=0)
    fc3_0 = ((model.fc3.weight == 0).sum(dim=1)).sum(dim=0)
    fc3 = fc3_1 + fc3_0
    #print(fc1, fc2, fc3, fc1+fc2+fc3, fc1_1 + fc2_1 + fc3_1 ,fc1_0 + fc2_0 + fc3_0)
    print('%d' % (fc1+fc2+fc3),
         '(%d |' % (fc1_1+fc2_1+fc3_1),
         '%d)' % (fc1_0+fc2_0+fc3_0)
         )
    print('%d' % fc1,
         '(%d |' % fc1_1,
         '%d)' % fc1_0
         )
    print('%d' % fc2,
         '(%d |' % fc2_1,
         '%d)' % fc2_0
         )
    print('%d' % fc3,
         '(%d |' % fc3_1,
         '%d)' % fc3_0
         )

a

b

weight_init(model, model_init, 1 - weight_remaining)

weight_init(model, model_init, 1 - weight_remaining)

model.state_dict().keys()

print(model.fc3.weight_orig)

for name, module in model.named_modules():
    if isinstance(module, nn.Linear):
        prune.l1_unstructured(module, name = 'weight', amount = 0.9)

    # init 값 복사
for name, p in model.named_parameters():
     if 'weight_orig' in name:
        for name2, p2 in model_init.named_parameters():
            if name[0:len(name) - 5] in name2:
                p.data = copy.deepcopy(p2.data)
                break
    if 'bias_orig' in name:
        for name2, p2 in modelinit.named_parameters():
            if name[0:len(name) - 5] in name2:
                p.data = copy.deepcopy(p2.data)
                break

for name, module in model.named_modules():
    if isinstance(module, nn.Linear):
        prune.remove(module, name = 'weight')

print(model.fc3.weight[0][0])

print(model_init.fc3.weight[0][0])

In [None]:
for i in range(noi):
    best_accu.append(0)
    best_accu[i] = [0, 0, 0]
    cp_mask = []
    if i != 0:
        remaining_weight = remaining_weight * (1-prune_per)
        cp_mask = weight_init(model, model_init, 1 - remaining_weight)
        switch = 1
    optimizer = optim.Adam(model.parameters(), lr = lr, weight_decay = weight_decay)
    print("Learning start(remaining weight : %d%%)" % round(remaining_weight * 100, 1))
    start_time = timeit.default_timer()
    pw = ((model.fc1.weight == 0).sum(dim=1)).sum(dim=0) + ((model.fc2.weight == 0).sum(dim=1)).sum(dim=0) + ((model.fc3.weight == 0).sum(dim=1)).sum(dim=0)
    print('pruned weight (All | Pruned) %d |' % now,'%d' % pw)
    #print(model.fc3.weight[0][0])
    #print(model_init.fc3.weight[0][0])
    calc_now(model)
    
    
    
    for epoch in tqdm(range(epochs)):
        if epoch == 0:
            accuracy = test(model, test_loader, criterion)
            visdom_plot(vis_plt, torch.Tensor([accuracy]), torch.Tensor([0]), str(round(remaining_weight*100, 1)))
            print('[epoch : %d]' % (epoch),
             '(loss: x.xxxxx)',
             '(accu: %.4f)' % (accuracy)
             )
        running_loss = train(model, train_loader, optimizer, criterion, cp_mask, switch)
        accuracy = test(model, test_loader, criterion)
        visdom_plot(vis_plt, torch.Tensor([accuracy]), torch.Tensor([(epoch+1) * 1000]), str(round(remaining_weight*100, 1)))
        
        # best accuracy list (weight_remain, epoch, accuracy)
        if best_accu[i][2] <= accuracy:
            best_accu[i] = [remaining_weight, epoch, accuracy]
        
        print('[epoch : %d]' % (epoch+1),
             '(loss: %.5f)' % (running_loss),
             '(accu: %.4f)' % (accuracy)
             )
    stop_time = timeit.default_timer()
    #print(model.fc3.weight[0][0])
    #print(model_init.fc3.weight[0][0])
    print("Finish!",
          "(Best accu: %.4f)" % best_accu[i][2],
          "(Time taken(sec) : %.2f)" % (stop_time - start_time),
          "\n")
    calc_now(model)
    print("\n\n\n\n\n\n")
    

In [None]:
print(model.fc3.bias)

In [None]:
print("Maximum accuracy per weight remaining")
for i in range(len(best_accu)):
    print("Remaining weight %.1f %% " % (best_accu[i][0] * 100),
         "Epoch %d" % best_accu[i][1],
         "Accu %.4f %%" % best_accu[i][2])

In [None]:
print(model.fc3.weight)

for name, p in model.named_parameters():
    EPS = 1e-6
    if 'weight' in name:
        tensor = p.data.cpu().numpy()
        grad_tensor = p.grad.data.cpu().numpy()
        grad_tensor = np.where(tensor < EPS, 0, grad_tensor)
        p.grad.data = torch.from_numpy(grad_tensor).to(device)
        print(p.grad.data)

데이터 숫자 60000
배치 길이 60
배치 개수 1000
epoch = 50

이터레이션 횟수 50000