In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init as init
import torch.nn.functional as F
import visdom
import copy
import torch.nn.utils.prune as prune
from tqdm.notebook import tqdm
import numpy as np
import timeit

# custom librarys
# model, parameters
import custom.utils as cu

In [2]:
torch.manual_seed(55)
torch.cuda.manual_seed_all(55)
torch.backends.cudnn.enabled = False

In [3]:
GPU_NUM = 1
device = torch.device(f'cuda:{GPU_NUM}' if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(device)

print ('Available devices ', torch.cuda.device_count())
print ('Current cuda device ', torch.cuda.current_device())
print(torch.cuda.get_device_name(device))

print("cpu와 cuda 중 다음 기기로 학습함:", device, '\n')

Available devices  2
Current cuda device  1
GeForce RTX 2080 Ti
cpu와 cuda 중 다음 기기로 학습함: cuda:1 



In [4]:
# visdom setting
vis = visdom.Visdom()
vis.close(env="main")

# make plot
vis_plt = vis.line(X=torch.Tensor(1).zero_(), Y=torch.Tensor(1).zero_(), 
                    opts=dict(title = 'LeNet300_Accuracy_Tracker',
                              legend=['100.0'],
                             showlegend=True,
                              xtickmin = 0,
                              xtickmax = 20000,
                              ytickmin = 0.95,
                              ytickmax = 0.99
                             )
                   )

def visdom_plot(loss_plot, loss_value, num, name):
    vis.line(X = num,
            Y = loss_value,
            win = loss_plot,
            name = name,
            update = 'append'
            )

Setting up a new session...


In [5]:
#switch = 0
best_accu = []

In [6]:
model_type = 'LeNet300'
#model_type = 'Conv6'

In [7]:
param = cu.parameters()

if model_type == 'LeNet300':
    model = cu.LeNet300().to(device)
elif model_type == 'Conv6':
    model = cu.Conv6().to(device)
    
param.type(model_type)    
model_init = copy.deepcopy(model)
criterion = nn.CrossEntropyLoss().to(device)

In [8]:
param.__dict__

{'lr': 0.0012,
 'epochs': 50,
 'batch_size': 60,
 'weight_decay': 0.0012,
 'iteration': 0,
 'remaining_weight_c': 'empty',
 'remaining_weight_f': 1,
 'remaining_weight_o': 1,
 'prune_per_c': 'empty',
 'prune_per_f': 0.2,
 'prune_per_o': 0.1,
 'noi': 12,
 'trainset': Dataset MNIST
     Number of datapoints: 60000
     Root location: ../MNIST_data/
     Split: Train
     StandardTransform
 Transform: Compose(
                ToTensor()
                Normalize(mean=(0.1307,), std=(0.3081,))
            ),
 'valset': 'empty',
 'testset': Dataset MNIST
     Number of datapoints: 10000
     Root location: ../MNIST_data/
     Split: Test
     StandardTransform
 Transform: Compose(
                ToTensor()
                Normalize(mean=(0.1307,), std=(0.3081,))
            ),
 'train_loader': <torch.utils.data.dataloader.DataLoader at 0x7f44736a6150>,
 'val_loader': 'empty',
 'test_loader': <torch.utils.data.dataloader.DataLoader at 0x7f43fadcae90>,
 'transforms': Compose(
     ToTensor()

In [9]:
param.__dict__

{'lr': 0.0012,
 'epochs': 50,
 'batch_size': 60,
 'weight_decay': 0.0012,
 'iteration': 0,
 'remaining_weight_c': 'empty',
 'remaining_weight_f': 1,
 'remaining_weight_o': 1,
 'prune_per_c': 'empty',
 'prune_per_f': 0.2,
 'prune_per_o': 0.1,
 'noi': 12,
 'trainset': Dataset MNIST
     Number of datapoints: 60000
     Root location: ../MNIST_data/
     Split: Train
     StandardTransform
 Transform: Compose(
                ToTensor()
                Normalize(mean=(0.1307,), std=(0.3081,))
            ),
 'valset': 'empty',
 'testset': Dataset MNIST
     Number of datapoints: 10000
     Root location: ../MNIST_data/
     Split: Test
     StandardTransform
 Transform: Compose(
                ToTensor()
                Normalize(mean=(0.1307,), std=(0.3081,))
            ),
 'train_loader': <torch.utils.data.dataloader.DataLoader at 0x7f44736a6150>,
 'val_loader': 'empty',
 'test_loader': <torch.utils.data.dataloader.DataLoader at 0x7f43fadcae90>,
 'transforms': Compose(
     ToTensor()

In [10]:
param.epochs = 1

In [11]:
param.epochs

1

In [12]:
param.test_loader

<torch.utils.data.dataloader.DataLoader at 0x7f43fadcae90>

# parameter
lr = 0.0012
#epochs = 50
#epochs = 20
epochs = 30
batch_size = 60
weight_decay = 1.2e-3
iteration = 0
remaining_weight = 1
prune_per = 0.2
# number of iteration
noi = 11

switch = 0
best_accu = []
# 마지막 layer의 Pruning rate는 기존의 1/2
# prune_per_ll = prune_per/2

cp_mask

transforms = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

mnist_train = dsets.MNIST(root='../MNIST_data/',
                         train=True,
                         transform=transforms,
                         download=True)
mnist_test = dsets.MNIST(root='../MNIST_data/',
                        train=False,
                        transform=transforms,
                        download=True)

train_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                         batch_size=batch_size,
                                         shuffle=True,
                                         drop_last=True)
test_loader = torch.utils.data.DataLoader(dataset=mnist_test,
                                         shuffle=False,
                                         drop_last=True)

In [13]:
# train, test, prune function
def train(model, dataloader, optimizer, criterion, cp_mask):
    model.train()
    running_loss = 0.0
    for batch_idx, (data, label) in enumerate(dataloader):
        data, label = data.to(device), label.to(device)
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, label)
        loss.backward()
        # 0-weight 학습 방지
        if cp_mask:
            i = 0
            for name, p in model.named_parameters():
                if 'weight' in name:
                    p.grad.data *= cp_mask[i]
                    i += 1
        optimizer.step()
        running_loss += loss / len(dataloader)
    return running_loss

def test(model, dataloader, criterion):
    model.eval()
    correct = 0
    total = 0
    test_loss = 0
    with torch.no_grad():
        for data, label in dataloader:
            data, label = data.to(device), label.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            #test_loss += F.nll_loss(outputs, label, reduction='sum').item() # sum up batch loss
            loss = criterion(outputs, label)
            #predicted = outputs.data.max(1, keepdim=True)[1]
            #correct += predicted.eq(label.data.view_as(predicted)).sum().item()
            
            test_loss += loss / len(dataloader)
            total += label.size(0)
            correct += (predicted == label).sum().item()
        #accuracy =  correct / len(dataloader)
        # 로더 -> 배치 개수 로더.dataset -> 전체 길이, 
    return (correct/total), test_loss

# prune function
# pruning mask 생성 -> mask 복사 -> init값 복사 -> prune 진행
def weight_init(model1, model2, c_rate, f_rate, o_rate):
    # layer별로 지정된 rate만큼 prune mask 생성
    for name, module in model1.named_modules():
        if isinstance(module, nn.Conv2d):
            prune.l1_unstructured(module, name = 'weight', amount = c_rate)
        if isinstance(module, nn.Linear):
            if name != 'fc3':
                prune.l1_unstructured(module, name = 'weight', amount = f_rate)
            else:
                prune.l1_unstructured(module, name = 'weight', amount = o_rate)
                        
    # mask 복사
    cp_mask = []
    for name, mask in model1.named_buffers():
        cp_mask.append(mask)
    
    # init 값을 model에 복사
    for name, p in model1.named_parameters():
        if 'weight_orig' in name:
            for name2, p2 in model2.named_parameters():
                if name[0:len(name) - 5] in name2:
                    p.data = copy.deepcopy(p2.data)
        if 'bias_orig' in name:
            for name2, p2 in model2.named_parameters():
                if name[0:len(name) - 5] in name2:
                    p.data = copy.deepcopy(p2.data)
                    
    # prune 진행
    for name, module in model1.named_modules():
        if isinstance(module, nn.Conv2d):
            prune.remove(module, name = 'weight')
        elif isinstance(module, nn.Linear):
            prune.remove(module, name = 'weight')
            
    # copy된 mask return
    return cp_mask

# weight count function
# list type[[name, all, non_zero, zero, per]]
def weight_counter(model):
    layer_weight = []
    all_weight = ['All_weight',0 ,0 ,0, 0]
    for name, p in model.named_parameters():
        if 'weight' in name:
            none_zero_w = (p != 0).sum().item()
            zero_w = (p == 0).sum().item()
            all_w = none_zero_w + zero_w

            all_weight[1] += all_w
            all_weight[2] += none_zero_w
            all_weight[3] += zero_w

            layer_weight.append([name, all_w, none_zero_w, zero_w, round(none_zero_w/all_w, 1)])

    all_weight[4] = round((all_weight[2]/all_weight[1]), 3)
    layer_weight.insert(0, all_weight)
    for i in range(len(layer_weight)):
        print(layer_weight[i])
    
    return layer_weight

print(len(param.train_loader), len(param.val_loader))

def test(model, dataloader, criterion):
    model.eval()
    correct = 0.0
    total = 0.0
    with torch.no_grad():
        for data, label in dataloader:
            data, label = data.to(device), label.to(device)
            outputs = model(data)
            
            predicted = torch.argmax(outputs.data, 1)
            total += label.size(0)
            correct += (predicted == label).sum().item()
            accuracy = (correct/total)

    return accuracy

In [14]:

#optimizer = optim.Adam(model.parameters(), lr = lr, weight_decay = 1.2e-3)

In [15]:
#EPS = 1e-6
# number of weight
a = ((model.fc1.weight != 0).sum(dim=1)).sum(dim=0) + ((model.fc2.weight != 0).sum(dim=1)).sum(dim=0) + ((model.fc3.weight != 0).sum(dim=1)).sum(dim=0)
#b = ((model.fc1.weight == 0).sum(dim=1)).sum(dim=0) + ((model.fc2.weight == 0).sum(dim=1)).sum(dim=0) + ((model.fc3.weight == 0).sum(dim=1)).sum(dim=0)
b = ((model.fc1.weight == 0).sum(dim=1)).sum(dim=0) + ((model.fc2.weight == 0).sum(dim=1)).sum(dim=0) + ((model.fc3.weight == 0).sum(dim=1)).sum(dim=0)

now = (a + b)

def calc_now(model):
    fc1_1 = ((model.fc1.weight != 0).sum(dim=1)).sum(dim=0).item()
    fc1_0 = ((model.fc1.weight == 0).sum(dim=1)).sum(dim=0).item()
    fc1 = fc1_1 + fc1_0
    fc1_p = fc1_0 / fc1_1
    fc2_1 = ((model.fc2.weight != 0).sum(dim=1)).sum(dim=0).item()
    fc2_0 = ((model.fc2.weight == 0).sum(dim=1)).sum(dim=0).item()
    fc2 = fc2_1 + fc2_0
    fc3_1 = ((model.fc3.weight != 0).sum(dim=1)).sum(dim=0).item()
    fc3_0 = ((model.fc3.weight == 0).sum(dim=1)).sum(dim=0).item()
    fc3 = fc3_1 + fc3_0
    #print(fc1, fc2, fc3, fc1+fc2+fc3, fc1_1 + fc2_1 + fc3_1 ,fc1_0 + fc2_0 + fc3_0)
    print("Remaining weight %.1f %%" %(((fc1_1+fc2_1+fc3_1)/(fc1+fc2+fc3))*100))
    print('total weight :',
        '%d' % (fc1+fc2+fc3),
         '(%d |' % (fc1_1+fc2_1+fc3_1),
         '%d)' % (fc1_0+fc2_0+fc3_0)
         )
    print('fc1 :',
        '%d' % fc1,
         '(%d |' % fc1_1,
         '%d)' % fc1_0
         )
    print('fc2 :',
        '%d' % fc2,
         '(%d |' % fc2_1,
         '%d)' % fc2_0
         )
    print('fc3 :',
        '%d' % fc3,
         '(%d |' % fc3_1,
         '%d)' % fc3_0
         )

a

b

weight_init(model, model_init, 1 - weight_remaining)

weight_init(model, model_init, 1 - weight_remaining)

model.state_dict().keys()

print(model.fc3.weight_orig)

for name, module in model.named_modules():
    if isinstance(module, nn.Linear):
        prune.l1_unstructured(module, name = 'weight', amount = 0.9)

    # init 값 복사
for name, p in model.named_parameters():
     if 'weight_orig' in name:
        for name2, p2 in model_init.named_parameters():
            if name[0:len(name) - 5] in name2:
                p.data = copy.deepcopy(p2.data)
                break
    if 'bias_orig' in name:
        for name2, p2 in modelinit.named_parameters():
            if name[0:len(name) - 5] in name2:
                p.data = copy.deepcopy(p2.data)
                break

for name, module in model.named_modules():
    if isinstance(module, nn.Linear):
        prune.remove(module, name = 'weight')

print(model.fc3.weight[0][0])

print(model_init.fc3.weight[0][0])

In [16]:
for i in range(param.noi):
    best_accu.append(0)
    best_accu[i] = [0, 0, 0]
    cp_mask = []
    
    if i != 0:
        # x1 = 1 * (1-0.2)
        # x2 = 1 * (1-0.2) * (1-0.2)
        # ...
        # xn = 1 * (1-0.2) ** n -> 남은 weight
        # pruning weight 1 - (1-0.2)**n
        
        # 필요한 값은 pruning weight 
        # c = conv f = fc o = output layer
        if  model_type == 'LeNet300':
             param.remaining_weight_c = 1
        else:
            param.remaining_weight_c = (1-param.prune_per_c) ** i
        param.remaining_weight_f = (1-param.prune_per_f) ** i
        param.remaining_weight_o = (1-param.prune_per_o) ** i
        #remaining_weight = param.remaining_weight_f
        #1- 남은 웨이트 -> prune 할 비율
        # pruning 및 mask 복사
        cp_mask = weight_init(model, model_init,
                              1 - param.remaining_weight_c,
                              1 - param.remaining_weight_f,
                              1 - param.remaining_weight_o
                             )
        #switch = 1
    optimizer = optim.Adam(model.parameters(), lr = param.lr, weight_decay = param.weight_decay)
    print("Learning start!\n")
    #calc_now(model)
    num_of_weight = weight_counter(model)
    #print(model.fc3.weight[0])
    
    start_time = timeit.default_timer()
    #pw = ((model.fc1.weight == 0).sum(dim=1)).sum(dim=0) + ((model.fc2.weight == 0).sum(dim=1)).sum(dim=0) + ((model.fc3.weight == 0).sum(dim=1)).sum(dim=0)
    #print('pruned weight (All | Pruned) %d |' % now,'%d' % pw)
    #print(model.fc3.weight[0][0])
    #print(model_init.fc3.weight[0][0])
    
    
    
    
    for epoch in tqdm(range(param.epochs)):
        # epoch가 0일때 정확도 계산
        if epoch == 0:
            accuracy, test_loss = test(model, param.test_loader, criterion)
            visdom_plot(vis_plt,torch.Tensor([accuracy]), torch.Tensor([0]),
                        str(round(num_of_weight[0][4]*100, 1))
                       )
            print('[epoch : %d]' % (epoch),
             '(loss: x.xxxxx)',
             '(accu: %.4f)' % (accuracy)
             )
        # model training    
        running_loss = train(model, param.train_loader, optimizer, criterion, cp_mask)
        
        # val_set이 있을 경우 val_set을 통해 loss, accu를 구한다.
        if param.valset == 'empty':
            accuracy, test_loss = test(model, param.test_loader, criterion)
        else:
            accuracy, test_loss = test(model, param.val_loader, criterion)
        
        # visdom plot
        visdom_plot(vis_plt, torch.Tensor([accuracy]), torch.Tensor([(epoch+1) * 1000]),
                    str(round(num_of_weight[0][4]*100, 1))
                   )
        
        # best accuracy list (weight_remain, epoch, accuracy)
        if best_accu[i][2] <= accuracy:
            best_accu[i] = [round(num_of_weight[0][4]*100, 1), epoch, accuracy]
        
        print('[epoch : %d]' % (epoch+1),
             '(r_loss: %.5f)' % (running_loss),
             '(t_loss: %.5f)' % (test_loss),
             '(accu: %.4f)' % (accuracy)
             )
    stop_time = timeit.default_timer()
    #print(model.fc3.weight[0][0])
    #print(model_init.fc3.weight[0][0])
    
    #print(model.fc3.weight[0])
    
    print("Finish!",
          "(Best accu: %.4f)" % best_accu[i][2],
          "(Time taken(sec) : %.2f)" % (stop_time - start_time),
          "\n\n\n\n\n\n\n")
    #calc_now(model)

    

Learning start!

['All_weight', 266200, 266200, 0, 1.0]
['fc1.weight', 235200, 235200, 0, 1.0]
['fc2.weight', 30000, 30000, 0, 1.0]
['fc3.weight', 1000, 1000, 0, 1.0]


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[epoch : 0] (loss: x.xxxxx) (accu: 0.1053)
[epoch : 1] (r_loss: 0.22417) (t_loss: 0.12337) (accu: 0.9617)

Finish! (Best accu: 0.9617) (Time taken(sec) : 14.86) 







Learning start!

['All_weight', 266200, 213060, 53140, 0.8]
['fc1.weight', 235200, 188160, 47040, 0.8]
['fc2.weight', 30000, 24000, 6000, 0.8]
['fc3.weight', 1000, 900, 100, 0.9]


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[epoch : 0] (loss: x.xxxxx) (accu: 0.1267)
[epoch : 1] (r_loss: 0.21979) (t_loss: 0.12898) (accu: 0.9585)

Finish! (Best accu: 0.9585) (Time taken(sec) : 14.30) 







Learning start!

['All_weight', 266200, 170538, 95662, 0.641]
['fc1.weight', 235200, 150528, 84672, 0.6]
['fc2.weight', 30000, 19200, 10800, 0.6]
['fc3.weight', 1000, 810, 190, 0.8]


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[epoch : 0] (loss: x.xxxxx) (accu: 0.1806)
[epoch : 1] (r_loss: 0.20324) (t_loss: 0.11282) (accu: 0.9650)

Finish! (Best accu: 0.9650) (Time taken(sec) : 14.42) 







Learning start!

['All_weight', 266200, 136511, 129689, 0.513]
['fc1.weight', 235200, 120422, 114778, 0.5]
['fc2.weight', 30000, 15360, 14640, 0.5]
['fc3.weight', 1000, 729, 271, 0.7]


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[epoch : 0] (loss: x.xxxxx) (accu: 0.1790)
[epoch : 1] (r_loss: 0.19153) (t_loss: 0.12398) (accu: 0.9629)

Finish! (Best accu: 0.9629) (Time taken(sec) : 14.86) 







Learning start!

['All_weight', 266200, 109282, 156918, 0.411]
['fc1.weight', 235200, 96338, 138862, 0.4]
['fc2.weight', 30000, 12288, 17712, 0.4]
['fc3.weight', 1000, 656, 344, 0.7]


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[epoch : 0] (loss: x.xxxxx) (accu: 0.2020)
[epoch : 1] (r_loss: 0.17964) (t_loss: 0.10236) (accu: 0.9712)

Finish! (Best accu: 0.9712) (Time taken(sec) : 14.88) 







Learning start!

['All_weight', 266200, 87490, 178710, 0.329]
['fc1.weight', 235200, 77070, 158130, 0.3]
['fc2.weight', 30000, 9830, 20170, 0.3]
['fc3.weight', 1000, 590, 410, 0.6]


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[epoch : 0] (loss: x.xxxxx) (accu: 0.2890)
[epoch : 1] (r_loss: 0.16418) (t_loss: 0.09958) (accu: 0.9721)

Finish! (Best accu: 0.9721) (Time taken(sec) : 14.86) 







Learning start!

['All_weight', 266200, 70051, 196149, 0.263]
['fc1.weight', 235200, 61656, 173544, 0.3]
['fc2.weight', 30000, 7864, 22136, 0.3]
['fc3.weight', 1000, 531, 469, 0.5]


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[epoch : 0] (loss: x.xxxxx) (accu: 0.2959)
[epoch : 1] (r_loss: 0.15752) (t_loss: 0.09388) (accu: 0.9699)

Finish! (Best accu: 0.9699) (Time taken(sec) : 14.76) 







Learning start!

['All_weight', 266200, 56094, 210106, 0.211]
['fc1.weight', 235200, 49325, 185875, 0.2]
['fc2.weight', 30000, 6291, 23709, 0.2]
['fc3.weight', 1000, 478, 522, 0.5]


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[epoch : 0] (loss: x.xxxxx) (accu: 0.2299)
[epoch : 1] (r_loss: 0.15345) (t_loss: 0.08650) (accu: 0.9732)

Finish! (Best accu: 0.9732) (Time taken(sec) : 14.89) 







Learning start!

['All_weight', 266200, 44923, 221277, 0.169]
['fc1.weight', 235200, 39460, 195740, 0.2]
['fc2.weight', 30000, 5033, 24967, 0.2]
['fc3.weight', 1000, 430, 570, 0.4]


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[epoch : 0] (loss: x.xxxxx) (accu: 0.2112)
[epoch : 1] (r_loss: 0.15490) (t_loss: 0.08849) (accu: 0.9741)

Finish! (Best accu: 0.9741) (Time taken(sec) : 14.54) 







Learning start!

['All_weight', 266200, 35982, 230218, 0.135]
['fc1.weight', 235200, 31568, 203632, 0.1]
['fc2.weight', 30000, 4027, 25973, 0.1]
['fc3.weight', 1000, 387, 613, 0.4]


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[epoch : 0] (loss: x.xxxxx) (accu: 0.3336)



KeyboardInterrupt: 

In [None]:
a = []

In [None]:
a.append(weight_counter(model))

In [None]:
b = weight_counter(model)

In [None]:
b

In [None]:
print(b)

In [None]:
round(num_of_weight[0][4]*100,3)

In [None]:
num_of_weight[0][4]*100

In [None]:
cp_mask

In [None]:
model.state_dict().keys()

In [None]:
a = [[1, 2], [10, 20]]

#배열이름.inset(위치, 넣을 값)
print(a[0][1])

#name, all, non_zero, zero, per
def weight_counter(model):
    layer_weight = []
    all_weight = ['All_weight',0 ,0 ,0, 0]
    for name, p in model.named_parameters():
        if 'weight' in name:
            none_zero_w = (p != 0).sum().item()
            zero_w = (p == 0).sum().item()
            all_w = none_zero_w + zero_w

            all_weight[1] += all_w
            all_weight[2] += none_zero_w
            all_weight[3] += zero_w

            layer_weight.append([name, all_w, none_zero_w, zero_w, round(none_zero_w/all_w,4)])

    all_weight[4] = round((all_weight[2]/all_weight[1]), 4)
    layer_weight.insert(0, all_weight)
    for i in range(len(layer_weight)):
        print(layer_weight[i])
    
    return layer_weight

In [None]:
weight_counter(model)

In [None]:
(model.conv1.weight != 0).sum()

In [None]:
(model.conv1.weight != 0).sum()

In [None]:
(model.conv1.weight == 0).sum()

In [None]:
(model.fc1.weight != 0).sum(dim=1).sum(dim=0)

In [None]:
(model.fc1.weight != 0).sum()

def calc_now(model):
    fc1_1 = ((model.fc1.weight != 0).sum(dim=1)).sum(dim=0).item()
    fc1_0 = ((model.fc1.weight == 0).sum(dim=1)).sum(dim=0).item()
    fc1 = fc1_1 + fc1_0
    fc1_p = fc1_0 / fc1_1
    fc2_1 = ((model.fc2.weight != 0).sum(dim=1)).sum(dim=0).item()
    fc2_0 = ((model.fc2.weight == 0).sum(dim=1)).sum(dim=0).item()
    fc2 = fc2_1 + fc2_0
    fc3_1 = ((model.fc3.weight != 0).sum(dim=1)).sum(dim=0).item()
    fc3_0 = ((model.fc3.weight == 0).sum(dim=1)).sum(dim=0).item()
    fc3 = fc3_1 + fc3_0
    #print(fc1, fc2, fc3, fc1+fc2+fc3, fc1_1 + fc2_1 + fc3_1 ,fc1_0 + fc2_0 + fc3_0)
    print("Remaining weight %.1f %%" %(((fc1_1+fc2_1+fc3_1)/(fc1+fc2+fc3))*100))
    print('total weight :',
        '%d' % (fc1+fc2+fc3),
         '(%d |' % (fc1_1+fc2_1+fc3_1),
         '%d)' % (fc1_0+fc2_0+fc3_0)
         )
    print('fc1 :',
        '%d' % fc1,
         '(%d |' % fc1_1,
         '%d)' % fc1_0
         )
    print('fc2 :',
        '%d' % fc2,
         '(%d |' % fc2_1,
         '%d)' % fc2_0
         )
    print('fc3 :',
        '%d' % fc3,
         '(%d |' % fc3_1,
         '%d)' % fc3_0
         )

In [None]:
for name, module in model1.named_modules():
    if isinstance(module, nn.Conv2d):
        prune.remove(module, name = 'weight')
    elif isinstance(module, nn.Linear):
        prune.remove(module, name = 'weight')

[epoch : 0] (loss: x.xxxxx) (accu: 0.1053)
[epoch : 1] (r_loss: 0.00008) (t_loss: -8.30549) (accu: 0.9617)
[epoch : 2] (r_loss: 0.00012) (t_loss: -8.05022) (accu: 0.9668)
[epoch : 3] (r_loss: 0.00021) (t_loss: -8.19537) (accu: 0.9714)
[epoch : 4] (r_loss: 0.00009) (t_loss: -7.94015) (accu: 0.9688)
[epoch : 5] (r_loss: 0.00006) (t_loss: -8.75366) (accu: 0.9603)


1
[epoch : 0] (loss: x.xxxxx) (accu: 0.1053)
[epoch : 1] (loss: 0.00008) (accu: 0.9617)
[epoch : 2] (loss: 0.00012) (accu: 0.9668)
[epoch : 3] (loss: 0.00021) (accu: 0.9714)
[epoch : 4] (loss: 0.00009) (accu: 0.9688)
[epoch : 5] (loss: 0.00006) (accu: 0.9603)
2
[epoch : 0] (loss: x.xxxxx) (accu: 0.0980)
[epoch : 1] (loss: 0.00002) (accu: 0.9651)
[epoch : 2] (loss: 0.00004) (accu: 0.9611)
[epoch : 3] (loss: 0.00003) (accu: 0.9703)
[epoch : 4] (loss: 0.00007) (accu: 0.9706)
[epoch : 5] (loss: 0.00002) (accu: 0.9663)

In [None]:
print(model.fc3.weight[0])

In [None]:
print("Maximum accuracy per weight remaining")
for i in range(len(best_accu)):
    print("Remaining weight %.1f %% " % (best_accu[i][0] * 100),
         "Epoch %d" % best_accu[i][1],
         "Accu %.4f %%" % best_accu[i][2])

In [None]:
print(model.fc3.weight)

for name, p in model.named_parameters():
    EPS = 1e-6
    if 'weight' in name:
        tensor = p.data.cpu().numpy()
        grad_tensor = p.grad.data.cpu().numpy()
        grad_tensor = np.where(tensor < EPS, 0, grad_tensor)
        p.grad.data = torch.from_numpy(grad_tensor).to(device)
        print(p.grad.data)

데이터 숫자 60000
배치 길이 60
배치 개수 1000
epoch = 50

이터레이션 횟수 50000