In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init as init
import torch.nn.functional as F
import visdom
import copy
import torch.nn.utils.prune as prune
from tqdm.notebook import tqdm
import numpy as np
import timeit

# custom librarys (model, parameters...)
import custom.utils as cu

In [2]:
torch.manual_seed(55)
torch.cuda.manual_seed_all(55)
torch.backends.cudnn.enabled = False

In [3]:
GPU_NUM = 1
device = torch.device(f'cuda:{GPU_NUM}' if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(device)

print ('Available devices ', torch.cuda.device_count())
print ('Current cuda device ', torch.cuda.current_device())
print(torch.cuda.get_device_name(device))

print("cpu와 cuda 중 다음 기기로 학습함:", device, '\n')

Available devices  2
Current cuda device  1
GeForce RTX 2080 Ti
cpu와 cuda 중 다음 기기로 학습함: cuda:1 



In [4]:
#switch = 0
best_accu = []

In [5]:
#model_type = 'LeNet300'
model_type = 'Conv6'

In [6]:
param = cu.parameters()

if model_type == 'LeNet300':
    model = cu.LeNet300().to(device)
elif model_type == 'Conv6':
    model = cu.Conv6().to(device)
    
param.type(model_type)    
model_init = copy.deepcopy(model)
criterion = nn.CrossEntropyLoss().to(device)

In [7]:
# parameter check
print('\n'.join("%s: %s" % item for item in param.__dict__.items()))

lr: 0.0003
epochs: 50
batch_size: 60
weight_decay: 0.003
iteration: 0
prune_per_c: 0.15
prune_per_f: 0.2
prune_per_o: 0.1
noi: 12
trainset: Dataset CIFAR10
    Number of datapoints: 50000
    Root location: ../CIFAR10/
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.247, 0.243, 0.261))
           )
valset: Dataset CIFAR10
    Number of datapoints: 50000
    Root location: ../CIFAR10/
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.247, 0.243, 0.261))
           )
testset: Dataset CIFAR10
    Number of datapoints: 10000
    Root location: ../CIFAR10/
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.247, 0.243, 0.261))
           )
train_loader: <torch.utils.data.dataloader.DataLoader object at 

print('\n'.join("%s: %s" % item for item in sorted(param.__dict__.items(), key=lambda i: i[0])) )

print('\n'.join("%s: %s" % item for item in sorted(param.__dict__.items(), key=lambda i: i[0])) )

print('\n'.join("%s: %s" % item for item in param.__dict__.items()))

In [8]:
len(param.train_loader.sampler)

45000

In [9]:
# visdom setting
vis = visdom.Visdom()
vis.close(env="main")

Tracker_type = "Accuracy_Tracker"
title = model_type + "_" + Tracker_type

# make plot
vis_plt = vis.line(X=torch.Tensor(1).zero_(), Y=torch.Tensor(1).zero_(), 
                    opts=dict(title = title,
                              legend=['100.0'],
                             showlegend=True,
                              xtickmin = 0,
                              xtickmax = 20000,
                              ytickmin = 0.95,
                              ytickmax = 0.99
                             )
                   )

def visdom_plot(loss_plot, loss_value, num, name):
    vis.line(X = num,
            Y = loss_value,
            win = loss_plot,
            name = name,
            update = 'append'
            )

Setting up a new session...


In [10]:
param.epochs = 1

In [11]:
param.epochs

1

In [12]:
len(param.train_loader)

750

In [13]:
len(param.val_loader)

83

# parameter
lr = 0.0012
#epochs = 50
#epochs = 20
epochs = 30
batch_size = 60
weight_decay = 1.2e-3
iteration = 0
remaining_weight = 1
prune_per = 0.2
# number of iteration
noi = 11

switch = 0
best_accu = []
# 마지막 layer의 Pruning rate는 기존의 1/2
# prune_per_ll = prune_per/2

cp_mask

transforms = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

mnist_train = dsets.MNIST(root='../MNIST_data/',
                         train=True,
                         transform=transforms,
                         download=True)
mnist_test = dsets.MNIST(root='../MNIST_data/',
                        train=False,
                        transform=transforms,
                        download=True)

train_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                         batch_size=batch_size,
                                         shuffle=True,
                                         drop_last=True)
test_loader = torch.utils.data.DataLoader(dataset=mnist_test,
                                         shuffle=False,
                                         drop_last=True)

In [22]:
# train, test, prune function
def train(model, dataloader, optimizer, criterion, cp_mask):
    model.train()
    running_loss = 0.0
    for batch_idx, (data, label) in enumerate(dataloader):
        data, label = data.to(device), label.to(device)
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, label)
        loss.backward()
        # 0-weight 학습 방지
        if cp_mask:
            i = 0
            for name, p in model.named_parameters():
                if 'weight' in name:
                    p.grad.data *= cp_mask[i]
                    i += 1
        optimizer.step()
        running_loss += loss / len(dataloader)
    return running_loss

def test(model, dataloader, criterion):
    model.eval()
    correct = 0
    total = 0
    test_loss = 0
    with torch.no_grad():
        for data, label in dataloader:
            data, label = data.to(device), label.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            #test_loss += F.nll_loss(outputs, label, reduction='sum').item() # sum up batch loss
            loss = criterion(outputs, label)
            #predicted = outputs.data.max(1, keepdim=True)[1]
            #correct += predicted.eq(label.data.view_as(predicted)).sum().item()
            
            test_loss += loss / len(dataloader)
            total += label.size(0)
            correct += (predicted == label).sum().item()
        #accuracy =  correct / len(dataloader)
        # 로더 -> 배치 개수 로더.dataset -> 전체 길이, 
    return (correct/total), test_loss

# prune function
# pruning mask 생성 -> mask 복사 -> init값 복사 -> prune 진행
def weight_init(model1, model2, c_rate, f_rate, o_rate):
    # layer별로 지정된 rate만큼 prune mask 생성
    for name, module in model1.named_modules():
        if isinstance(module, nn.Conv2d):
            prune.l1_unstructured(module, name = 'weight', amount = c_rate)
        if isinstance(module, nn.Linear):
            if name != 'fc3':
                prune.l1_unstructured(module, name = 'weight', amount = f_rate)
            else:
                prune.l1_unstructured(module, name = 'weight', amount = o_rate)
                        
    # mask 복사
    cp_mask = []
    for name, mask in model1.named_buffers():
        cp_mask.append(mask)
    
    # init 값을 model에 복사
    for name, p in model1.named_parameters():
        if 'weight_orig' in name:
            for name2, p2 in model2.named_parameters():
                if name[0:len(name) - 5] in name2:
                    p.data = copy.deepcopy(p2.data)
        if 'bias_orig' in name:
            for name2, p2 in model2.named_parameters():
                if name[0:len(name) - 5] in name2:
                    p.data = copy.deepcopy(p2.data)
                    
    # prune 진행
    for name, module in model1.named_modules():
        if isinstance(module, nn.Conv2d):
            prune.remove(module, name = 'weight')
        elif isinstance(module, nn.Linear):
            prune.remove(module, name = 'weight')
            
    # copy된 mask return
    return cp_mask

# weight count function
# dict type['name' : [all, non_zero, zero, ratio]]
def weight_counter(model):
    layer_weight = {'all.weight':[0, 0, 0, 0]}
    
    for name, p in model.named_parameters():
        if 'weight' in name:
            remain, pruned = (p != 0).sum().item(), (p == 0).sum().item()
            layer_weight[name] = [remain+pruned, remain, pruned, round((remain/(remain+pruned))*100, 2)]
            
    for i in layer_weight.keys():
        for j in range(0, 3):
            layer_weight['all.weight'][j] += layer_weight[i][j]
    layer_weight['all.weight'][3] = round(layer_weight['all.weight'][1]/layer_weight['all.weight'][0]*100, 2)

    print("Layer".center(12), "Weight".center(39), "Ratio(%)".rjust(7), sep='')
    for i in layer_weight.keys():
        print("%s" % i.ljust(13), ":",
              ("%s (%s | %s)" % (layer_weight[i][0], layer_weight[i][1], layer_weight[i][2])).center(36),
              ("%.2f" % layer_weight[i][3]).rjust(7),
              sep=''
             )
        
    return layer_weight

In [24]:
aaa = weight_counter(model)

   Layer                     Weight                Ratio(%)
all.weight   :     2261184 (1275164 | 986020)       56.39
conv1.weight :         1728 (1061 | 667)            61.40
conv2.weight :       36864 (22639 | 14225)          61.41
conv3.weight :       73728 (45278 | 28450)          61.41
conv4.weight :       147456 (90556 | 56900)         61.41
conv5.weight :      294912 (181113 | 113799)        61.41
conv6.weight :      589824 (362226 | 227598)        61.41
fc1.weight   :     1048576 (536871 | 511705)        51.20
fc2.weight   :       65536 (33554 | 31982)          51.20
fc3.weight   :         2560 (1866 | 694)            72.89


In [48]:
z = {}

In [36]:
z[0] = [0]

In [38]:
z[i] = 1

In [40]:
z[i] = aaa

In [49]:
z[1] =aaa

In [50]:
z

{1: {'all.weight': [2261184, 1275164, 986020, 56.39],
  'conv1.weight': [1728, 1061, 667, 61.4],
  'conv2.weight': [36864, 22639, 14225, 61.41],
  'conv3.weight': [73728, 45278, 28450, 61.41],
  'conv4.weight': [147456, 90556, 56900, 61.41],
  'conv5.weight': [294912, 181113, 113799, 61.41],
  'conv6.weight': [589824, 362226, 227598, 61.41],
  'fc1.weight': [1048576, 536871, 511705, 51.2],
  'fc2.weight': [65536, 33554, 31982, 51.2],
  'fc3.weight': [2560, 1866, 694, 72.89]}}

In [42]:
z.keys()

dict_keys([0])

In [45]:
z[0]['all.weight'][0]

2261184

In [32]:
i = 0

In [34]:
z[i] += [aaa]

KeyError: 0

weight_counter2(model)

    layer_weight = {'all.weight':[0, 0, 0, 0]}
    for name, p in model.named_parameters():
        if 'weight' in name:
            remain, pruned = (p != 0).sum().item(), (p == 0).sum().item()
            layer_weight[name] = [remain+pruned, remain, pruned, round((remain/remain+pruned), 1)]
    for i in layer_weight.keys():
        for j in range(0, 3):
            layer_weight['all.weight'][j] += layer_weight[i][j]
    layer_weight['all.weight'][3] = round(layer_weight['all.weight'][1]/layer_weight['all.weight'][0], 1)

    print("Layer".center(12), "Weight".center(38), "Ratio".rjust(7), sep='')
    for i in layer_weight.keys():
        print("%s" % i.ljust(13), ":",
              ("%s (%s | %s)" % (layer_weight[i][0], layer_weight[i][1], layer_weight[i][2])).center(35),
              ("%.2f" % layer_weight[i][3]).rjust(7),
              sep=''
             )
    return layer_weight

abc = weight_counter(model)

print(len(param.train_loader), len(param.val_loader))

def test(model, dataloader, criterion):
    model.eval()
    correct = 0.0
    total = 0.0
    with torch.no_grad():
        for data, label in dataloader:
            data, label = data.to(device), label.to(device)
            outputs = model(data)
            
            predicted = torch.argmax(outputs.data, 1)
            total += label.size(0)
            correct += (predicted == label).sum().item()
            accuracy = (correct/total)

    return accuracy

In [16]:

#optimizer = optim.Adam(model.parameters(), lr = lr, weight_decay = 1.2e-3)

#EPS = 1e-6
# number of weight
a = ((model.fc1.weight != 0).sum(dim=1)).sum(dim=0) + ((model.fc2.weight != 0).sum(dim=1)).sum(dim=0) + ((model.fc3.weight != 0).sum(dim=1)).sum(dim=0)
#b = ((model.fc1.weight == 0).sum(dim=1)).sum(dim=0) + ((model.fc2.weight == 0).sum(dim=1)).sum(dim=0) + ((model.fc3.weight == 0).sum(dim=1)).sum(dim=0)
b = ((model.fc1.weight == 0).sum(dim=1)).sum(dim=0) + ((model.fc2.weight == 0).sum(dim=1)).sum(dim=0) + ((model.fc3.weight == 0).sum(dim=1)).sum(dim=0)

now = (a + b)

def calc_now(model):
    fc1_1 = ((model.fc1.weight != 0).sum(dim=1)).sum(dim=0).item()
    fc1_0 = ((model.fc1.weight == 0).sum(dim=1)).sum(dim=0).item()
    fc1 = fc1_1 + fc1_0
    fc1_p = fc1_0 / fc1_1
    fc2_1 = ((model.fc2.weight != 0).sum(dim=1)).sum(dim=0).item()
    fc2_0 = ((model.fc2.weight == 0).sum(dim=1)).sum(dim=0).item()
    fc2 = fc2_1 + fc2_0
    fc3_1 = ((model.fc3.weight != 0).sum(dim=1)).sum(dim=0).item()
    fc3_0 = ((model.fc3.weight == 0).sum(dim=1)).sum(dim=0).item()
    fc3 = fc3_1 + fc3_0
    #print(fc1, fc2, fc3, fc1+fc2+fc3, fc1_1 + fc2_1 + fc3_1 ,fc1_0 + fc2_0 + fc3_0)
    print("Remaining weight %.1f %%" %(((fc1_1+fc2_1+fc3_1)/(fc1+fc2+fc3))*100))
    print('total weight :',
        '%d' % (fc1+fc2+fc3),
         '(%d |' % (fc1_1+fc2_1+fc3_1),
         '%d)' % (fc1_0+fc2_0+fc3_0)
         )
    print('fc1 :',
        '%d' % fc1,
         '(%d |' % fc1_1,
         '%d)' % fc1_0
         )
    print('fc2 :',
        '%d' % fc2,
         '(%d |' % fc2_1,
         '%d)' % fc2_0
         )
    print('fc3 :',
        '%d' % fc3,
         '(%d |' % fc3_1,
         '%d)' % fc3_0
         )

a

b

weight_init(model, model_init, 1 - weight_remaining)

weight_init(model, model_init, 1 - weight_remaining)

model.state_dict().keys()

print(model.fc3.weight_orig)

for name, module in model.named_modules():
    if isinstance(module, nn.Linear):
        prune.l1_unstructured(module, name = 'weight', amount = 0.9)

    # init 값 복사
for name, p in model.named_parameters():
     if 'weight_orig' in name:
        for name2, p2 in model_init.named_parameters():
            if name[0:len(name) - 5] in name2:
                p.data = copy.deepcopy(p2.data)
                break
    if 'bias_orig' in name:
        for name2, p2 in modelinit.named_parameters():
            if name[0:len(name) - 5] in name2:
                p.data = copy.deepcopy(p2.data)
                break

for name, module in model.named_modules():
    if isinstance(module, nn.Linear):
        prune.remove(module, name = 'weight')

print(model.fc3.weight[0][0])

print(model_init.fc3.weight[0][0])

In [17]:
param.prune_per_c

0.15

In [18]:
for i in range(param.noi):
    best_accu.append(0)
    best_accu[i] = [0, 0, 0]
    cp_mask = []

    # pruning 및 mask 복사
    # layer별 prune rate를 입력
    cp_mask = weight_init(model, model_init, 
                           (1 - ((1-param.prune_per_c) ** i)),
                           (1 - ((1-param.prune_per_f) ** i)),
                           (1 - ((1-param.prune_per_o) ** i))
                          )

    optimizer = optim.Adam(model.parameters(), lr = param.lr, weight_decay = param.weight_decay)
    print("Learning start!\n")
    # weight 개수 계산 및 저장
    
    #iteration 횟수 = i
    
    weight_counts = weight_counter(model)
    
    #print(model.conv1.weight[0])
    #print(model.fc3.weight[0])
    
    
    remaining_weight = weight_counts['all.weight'][3]
    
    start_time = timeit.default_timer()
    
    for epoch in tqdm(range(param.epochs)):
        # epoch가 0일때 정확도 계산
        if epoch == 0:
            accuracy, test_loss = test(model, param.test_loader, criterion)
            visdom_plot(vis_plt,torch.Tensor([accuracy]), torch.Tensor([0]),
                        str(remaining_weight)
                       )
            print('[epoch : %d]' % (epoch),
             '(r_loss: x.xxxxx)',
             '(t_loss: x.xxxxx)',
             '(accu: %.4f)' % (accuracy)
             )
        # model training    
        running_loss = train(model, param.train_loader, optimizer, criterion, cp_mask)
        
        # val_set이 있을 경우 val_set을 통해 loss, accu를 구한다.
        if param.valset == 'empty':
            accuracy, test_loss = test(model, param.test_loader, criterion)
        else:
            accuracy, test_loss = test(model, param.val_loader, criterion)
        
        # visdom plot
        visdom_plot(vis_plt, torch.Tensor([accuracy]), torch.Tensor([(epoch+1) * 1000]),
                    str(remaining_weight)
                   )
        
        # best accuracy list (weight_remain, epoch, accuracy)
        if best_accu[i][2] <= accuracy:
            best_accu[i] = [remaining_weight, epoch, accuracy]
        
        print('[epoch : %d]' % (epoch+1),
             '(r_loss: %.5f)' % (running_loss),
             '(t_loss: %.5f)' % (test_loss),
             '(accu: %.4f)' % (accuracy)
             )
    stop_time = timeit.default_timer()
    #print(model.fc3.weight[0][0])
    #print(model_init.fc3.weight[0][0])
    
    #print(model.fc3.weight[0])
    
    #print(model.conv1.weight[0])
    #print(model.fc3.weight[0])
    
    print("Finish!",
          "(Best accu: %.4f)" % best_accu[i][2],
          "(Time taken(sec) : %.2f)" % (stop_time - start_time),
          "\n\n\n\n\n\n\n")

Learning start!

   Layer                    Weight                  Ratio
all.weight   :       2261184 (2261184 | 0)         100.00
conv1.weight :          1728 (1728 | 0)            100.00
conv2.weight :         36864 (36864 | 0)           100.00
conv3.weight :         73728 (73728 | 0)           100.00
conv4.weight :        147456 (147456 | 0)          100.00
conv5.weight :        294912 (294912 | 0)          100.00
conv6.weight :        589824 (589824 | 0)          100.00
fc1.weight   :       1048576 (1048576 | 0)         100.00
fc2.weight   :         65536 (65536 | 0)           100.00
fc3.weight   :          2560 (2560 | 0)            100.00


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[epoch : 0] (r_loss: x.xxxxx) (t_loss: x.xxxxx) (accu: 0.1000)
[epoch : 1] (r_loss: 1.98209) (t_loss: 1.74106) (accu: 0.3386)

Finish! (Best accu: 0.3386) (Time taken(sec) : 31.67) 







Learning start!

   Layer                    Weight                  Ratio
all.weight   :     2261184 (1866429 | 394755)       82.54
conv1.weight :         1728 (1469 | 259)            85.01
conv2.weight :        36864 (31334 | 5530)          85.00
conv3.weight :       73728 (62669 | 11059)          85.00
conv4.weight :      147456 (125338 | 22118)         85.00
conv5.weight :      294912 (250675 | 44237)         85.00
conv6.weight :      589824 (501350 | 88474)         85.00
fc1.weight   :     1048576 (838861 | 209715)        80.00
fc2.weight   :       65536 (52429 | 13107)          80.00
fc3.weight   :         2560 (2304 | 256)            90.00


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[epoch : 0] (r_loss: x.xxxxx) (t_loss: x.xxxxx) (accu: 0.1000)
[epoch : 1] (r_loss: 1.94358) (t_loss: 1.72280) (accu: 0.3398)

Finish! (Best accu: 0.3398) (Time taken(sec) : 31.13) 







Learning start!

   Layer                    Weight                  Ratio
all.weight   :     2261184 (1542015 | 719169)       68.20
conv1.weight :         1728 (1248 | 480)            72.22
conv2.weight :       36864 (26634 | 10230)          72.25
conv3.weight :       73728 (53268 | 20460)          72.25
conv4.weight :      147456 (106537 | 40919)         72.25
conv5.weight :      294912 (213074 | 81838)         72.25
conv6.weight :      589824 (426148 | 163676)        72.25
fc1.weight   :     1048576 (671089 | 377487)        64.00
fc2.weight   :       65536 (41943 | 23593)          64.00
fc3.weight   :         2560 (2074 | 486)            81.02


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[epoch : 0] (r_loss: x.xxxxx) (t_loss: x.xxxxx) (accu: 0.1000)
[epoch : 1] (r_loss: 1.92014) (t_loss: 1.75277) (accu: 0.3329)

Finish! (Best accu: 0.3329) (Time taken(sec) : 31.84) 







Learning start!

   Layer                    Weight                  Ratio
all.weight   :     2261184 (1275164 | 986020)       56.39
conv1.weight :         1728 (1061 | 667)            61.40
conv2.weight :       36864 (22639 | 14225)          61.41
conv3.weight :       73728 (45278 | 28450)          61.41
conv4.weight :       147456 (90556 | 56900)         61.41
conv5.weight :      294912 (181113 | 113799)        61.41
conv6.weight :      589824 (362226 | 227598)        61.41
fc1.weight   :     1048576 (536871 | 511705)        51.20
fc2.weight   :       65536 (33554 | 31982)          51.20
fc3.weight   :         2560 (1866 | 694)            72.89


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[epoch : 0] (r_loss: x.xxxxx) (t_loss: x.xxxxx) (accu: 0.1000)



KeyboardInterrupt: 

In [None]:
print(model.conv1.weight[0])

#name, all, non_zero, zero, per
def weight_counter(model):
    layer_weight = []
    all_weight = ['All_weight',0 ,0 ,0, 0]
    for name, p in model.named_parameters():
        if 'weight' in name:
            none_zero_w = (p != 0).sum().item()
            zero_w = (p == 0).sum().item()
            all_w = none_zero_w + zero_w

            all_weight[1] += all_w
            all_weight[2] += none_zero_w
            all_weight[3] += zero_w

            layer_weight.append([name, all_w, none_zero_w, zero_w, round(none_zero_w/all_w,4)])

    all_weight[4] = round((all_weight[2]/all_weight[1]), 4)
    layer_weight.insert(0, all_weight)
    for i in range(len(layer_weight)):
        print(layer_weight[i])
    
    return layer_weight

In [None]:
weight_counter(model)

In [None]:
(model.conv1.weight != 0).sum()

In [None]:
(model.conv1.weight != 0).sum()

In [None]:
(model.conv1.weight == 0).sum()

In [None]:
(model.fc1.weight != 0).sum(dim=1).sum(dim=0)

In [None]:
(model.fc1.weight != 0).sum()

def calc_now(model):
    fc1_1 = ((model.fc1.weight != 0).sum(dim=1)).sum(dim=0).item()
    fc1_0 = ((model.fc1.weight == 0).sum(dim=1)).sum(dim=0).item()
    fc1 = fc1_1 + fc1_0
    fc1_p = fc1_0 / fc1_1
    fc2_1 = ((model.fc2.weight != 0).sum(dim=1)).sum(dim=0).item()
    fc2_0 = ((model.fc2.weight == 0).sum(dim=1)).sum(dim=0).item()
    fc2 = fc2_1 + fc2_0
    fc3_1 = ((model.fc3.weight != 0).sum(dim=1)).sum(dim=0).item()
    fc3_0 = ((model.fc3.weight == 0).sum(dim=1)).sum(dim=0).item()
    fc3 = fc3_1 + fc3_0
    #print(fc1, fc2, fc3, fc1+fc2+fc3, fc1_1 + fc2_1 + fc3_1 ,fc1_0 + fc2_0 + fc3_0)
    print("Remaining weight %.1f %%" %(((fc1_1+fc2_1+fc3_1)/(fc1+fc2+fc3))*100))
    print('total weight :',
        '%d' % (fc1+fc2+fc3),
         '(%d |' % (fc1_1+fc2_1+fc3_1),
         '%d)' % (fc1_0+fc2_0+fc3_0)
         )
    print('fc1 :',
        '%d' % fc1,
         '(%d |' % fc1_1,
         '%d)' % fc1_0
         )
    print('fc2 :',
        '%d' % fc2,
         '(%d |' % fc2_1,
         '%d)' % fc2_0
         )
    print('fc3 :',
        '%d' % fc3,
         '(%d |' % fc3_1,
         '%d)' % fc3_0
         )

In [None]:
for name, module in model1.named_modules():
    if isinstance(module, nn.Conv2d):
        prune.remove(module, name = 'weight')
    elif isinstance(module, nn.Linear):
        prune.remove(module, name = 'weight')

In [None]:
print(model.fc3.weight[0])

In [None]:
print("Maximum accuracy per weight remaining")
for i in range(len(best_accu)):
    print("Remaining weight %.1f %% " % (best_accu[i][0] * 100),
         "Epoch %d" % best_accu[i][1],
         "Accu %.4f %%" % best_accu[i][2])

model2 = cu.LeNet300().to(device)

model2.state_dict().keys()

param.type("LeNet300")

model2.fc3.weight

cp_mask = weight_init(model, model_init,
                              0,
                              0,
                              0
                             )

model2.fc3.weight

class abc:
    def __init__(self):
        self.a = 1
        self.b = 2
        d = 3
        c = self.a

aa = abc()

aa.d

print(aa)

for name, p in model.named_parameters():
    EPS = 1e-6
    if 'weight' in name:
        tensor = p.data.cpu().numpy()
        grad_tensor = p.grad.data.cpu().numpy()
        grad_tensor = np.where(tensor < EPS, 0, grad_tensor)
        p.grad.data = torch.from_numpy(grad_tensor).to(device)
        print(p.grad.data)

데이터 숫자 60000
배치 길이 60
배치 개수 1000
epoch = 50

이터레이션 횟수 50000