In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init as init
import torch.nn.functional as F
import visdom
import copy
import torch.nn.utils.prune as prune
from tqdm.notebook import tqdm
import numpy as np
import timeit

# custom librarys (model, parameters...)
import custom.utils as cu

In [7]:
torch.manual_seed(55)
torch.cuda.manual_seed_all(55)
torch.backends.cudnn.enabled = False

In [8]:
GPU_NUM = 1
device = torch.device(f'cuda:{GPU_NUM}' if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(device)

print ('Available devices ', torch.cuda.device_count())
print ('Current cuda device ', torch.cuda.current_device())
print(torch.cuda.get_device_name(device))

print("cpu와 cuda 중 다음 기기로 학습함:", device, '\n')

Available devices  2
Current cuda device  1
GeForce RTX 2080 Ti
cpu와 cuda 중 다음 기기로 학습함: cuda:1 



In [9]:
#switch = 0
best_accu = []

In [10]:
model_type = 'LeNet300'
#model_type = 'Conv6'

In [43]:
param = cu.parameters()

if model_type == 'LeNet300':
    model = cu.LeNet300().to(device)
elif model_type == 'Conv6':
    model = cu.Conv6().to(device)
    
param.type(model_type)    
model_init = copy.deepcopy(model)
criterion = nn.CrossEntropyLoss().to(device)

In [7]:
# parameter check
print('\n'.join("%s: %s" % item for item in param.__dict__.items()))

lr: 0.0003
epochs: 50
batch_size: 60
weight_decay: 0.003
iteration: 0
prune_per_c: 0.15
prune_per_f: 0.2
prune_per_o: 0.1
noi: 12
trainset: Dataset CIFAR10
    Number of datapoints: 50000
    Root location: ../CIFAR10/
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.247, 0.243, 0.261))
           )
valset: Dataset CIFAR10
    Number of datapoints: 50000
    Root location: ../CIFAR10/
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.247, 0.243, 0.261))
           )
testset: Dataset CIFAR10
    Number of datapoints: 10000
    Root location: ../CIFAR10/
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.247, 0.243, 0.261))
           )
train_loader: <torch.utils.data.dataloader.DataLoader object at 

print('\n'.join("%s: %s" % item for item in sorted(param.__dict__.items(), key=lambda i: i[0])) )

print('\n'.join("%s: %s" % item for item in sorted(param.__dict__.items(), key=lambda i: i[0])) )

print('\n'.join("%s: %s" % item for item in param.__dict__.items()))

In [8]:
len(param.train_loader.sampler)

45000

In [9]:
# visdom setting
vis = visdom.Visdom()
vis.close(env="main")

Tracker_type = "Accuracy_Tracker"
title = model_type + "_" + Tracker_type

# make plot
vis_plt = vis.line(X=torch.Tensor(1).zero_(), Y=torch.Tensor(1).zero_(), 
                    opts=dict(title = title,
                              legend=['100.0'],
                             showlegend=True,
                              xtickmin = 0,
                              xtickmax = 20000,
                              ytickmin = 0.95,
                              ytickmax = 0.99
                             )
                   )

def visdom_plot(loss_plot, loss_value, num, name):
    vis.line(X = num,
            Y = loss_value,
            win = loss_plot,
            name = name,
            update = 'append'
            )

Setting up a new session...


In [10]:
param.epochs = 1
#param.noi = 

In [11]:
param.epochs

1

In [12]:
len(param.train_loader)

750

In [13]:
len(param.val_loader)

83

# parameter
lr = 0.0012
#epochs = 50
#epochs = 20
epochs = 30
batch_size = 60
weight_decay = 1.2e-3
iteration = 0
remaining_weight = 1
prune_per = 0.2
# number of iteration
noi = 11

switch = 0
best_accu = []
# 마지막 layer의 Pruning rate는 기존의 1/2
# prune_per_ll = prune_per/2

cp_mask

transforms = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

mnist_train = dsets.MNIST(root='../MNIST_data/',
                         train=True,
                         transform=transforms,
                         download=True)
mnist_test = dsets.MNIST(root='../MNIST_data/',
                        train=False,
                        transform=transforms,
                        download=True)

train_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                         batch_size=batch_size,
                                         shuffle=True,
                                         drop_last=True)
test_loader = torch.utils.data.DataLoader(dataset=mnist_test,
                                         shuffle=False,
                                         drop_last=True)

In [2]:
# train, test, prune function
def train(model, dataloader, optimizer, criterion, cp_mask):
    model.train()
    running_loss = 0.0
    for batch_idx, (data, label) in enumerate(dataloader):
        data, label = data.to(device), label.to(device)
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, label)
        loss.backward()
        # 0-weight 학습 방지
        """
        if cp_mask:
            i = 0
            for name, p in model.named_parameters():
                if 'weight' in name:
                    p.grad.data *= cp_mask[i]
                    i += 1
        """            
        optimizer.step()
        running_loss += loss / len(dataloader)
    return running_loss

def test(model, dataloader, criterion):
    model.eval()
    correct = 0
    total = 0
    test_loss = 0
    with torch.no_grad():
        for data, label in dataloader:
            data, label = data.to(device), label.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            #test_loss += F.nll_loss(outputs, label, reduction='sum').item() # sum up batch loss
            loss = criterion(outputs, label)
            #predicted = outputs.data.max(1, keepdim=True)[1]
            #correct += predicted.eq(label.data.view_as(predicted)).sum().item()
            
            test_loss += loss / len(dataloader)
            total += label.size(0)
            correct += (predicted == label).sum().item()
        #accuracy =  correct / len(dataloader)
        # 로더 -> 배치 개수 로더.dataset -> 전체 길이, 
    return (correct/total), test_loss

# prune function
# pruning mask 생성 -> mask 복사 -> init값 복사 -> prune 진행
def weight_init(model1, model2, c_rate, f_rate, o_rate):
    # layer별로 지정된 rate만큼 prune mask 생성
    for name, module in model1.named_modules():
        if isinstance(module, nn.Conv2d):
            prune.l1_unstructured(module, name = 'weight', amount = c_rate)
        if isinstance(module, nn.Linear):
            if name != 'fc3':
                prune.l1_unstructured(module, name = 'weight', amount = f_rate)
            else:
                prune.l1_unstructured(module, name = 'weight', amount = o_rate)
                        
    # mask 복사
    cp_mask = []
    for name, mask in model1.named_buffers():
        cp_mask.append(mask)
    
    # init 값을 model에 복사
    for name, p in model1.named_parameters():
        if 'weight_orig' in name:
            for name2, p2 in model2.named_parameters():
                if name[0:len(name) - 5] in name2:
                    p.data = copy.deepcopy(p2.data)
        if 'bias_orig' in name:
            for name2, p2 in model2.named_parameters():
                if name[0:len(name) - 5] in name2:
                    p.data = copy.deepcopy(p2.data)
                    
    # prune 진행
    for name, module in model1.named_modules():
        if isinstance(module, nn.Conv2d):
            prune.remove(module, name = 'weight')
        elif isinstance(module, nn.Linear):
            prune.remove(module, name = 'weight')
            
    # copy된 mask return
    return cp_mask

# weight count function
# dict type['name' : [all, non_zero, zero, ratio]]
def weight_counter(model):
    layer_weight = {'all.weight':[0, 0, 0, 0]}
    
    for name, p in model.named_parameters():
        if 'weight' in name:
            remain, pruned = (p != 0).sum().item(), (p == 0).sum().item()
            layer_weight[name] = [remain+pruned, remain, pruned, round((remain/(remain+pruned))*100, 2)]
            
    for i in layer_weight.keys():
        for j in range(0, 3):
            layer_weight['all.weight'][j] += layer_weight[i][j]
    layer_weight['all.weight'][3] = round(layer_weight['all.weight'][1]/layer_weight['all.weight'][0]*100, 2)

    print("Layer".center(12), "Weight".center(39), "Ratio(%)".rjust(7), sep='')
    for i in layer_weight.keys():
        print("%s" % i.ljust(13), ":",
              ("%s (%s | %s)" % (layer_weight[i][0], layer_weight[i][1], layer_weight[i][2])).center(36),
              ("%.2f" % layer_weight[i][3]).rjust(7),
              sep=''
             )
        
    return layer_weight

In [15]:
aaa = weight_counter(model)

   Layer                     Weight                Ratio(%)
all.weight   :       2261184 (2261184 | 0)         100.00
conv1.weight :          1728 (1728 | 0)            100.00
conv2.weight :         36864 (36864 | 0)           100.00
conv3.weight :         73728 (73728 | 0)           100.00
conv4.weight :        147456 (147456 | 0)          100.00
conv5.weight :        294912 (294912 | 0)          100.00
conv6.weight :        589824 (589824 | 0)          100.00
fc1.weight   :       1048576 (1048576 | 0)         100.00
fc2.weight   :         65536 (65536 | 0)           100.00
fc3.weight   :          2560 (2560 | 0)            100.00


z = {}

z[0] = [0]

z[i] = 1

z[i] = aaa

z[1] =aaa

z

z.keys()

z[0]['all.weight'][0]

i = 0

z[i] += [aaa]

weight_counter2(model)

    layer_weight = {'all.weight':[0, 0, 0, 0]}
    for name, p in model.named_parameters():
        if 'weight' in name:
            remain, pruned = (p != 0).sum().item(), (p == 0).sum().item()
            layer_weight[name] = [remain+pruned, remain, pruned, round((remain/remain+pruned), 1)]
    for i in layer_weight.keys():
        for j in range(0, 3):
            layer_weight['all.weight'][j] += layer_weight[i][j]
    layer_weight['all.weight'][3] = round(layer_weight['all.weight'][1]/layer_weight['all.weight'][0], 1)

    print("Layer".center(12), "Weight".center(38), "Ratio".rjust(7), sep='')
    for i in layer_weight.keys():
        print("%s" % i.ljust(13), ":",
              ("%s (%s | %s)" % (layer_weight[i][0], layer_weight[i][1], layer_weight[i][2])).center(35),
              ("%.2f" % layer_weight[i][3]).rjust(7),
              sep=''
             )
    return layer_weight

abc = weight_counter(model)

print(len(param.train_loader), len(param.val_loader))

def test(model, dataloader, criterion):
    model.eval()
    correct = 0.0
    total = 0.0
    with torch.no_grad():
        for data, label in dataloader:
            data, label = data.to(device), label.to(device)
            outputs = model(data)
            
            predicted = torch.argmax(outputs.data, 1)
            total += label.size(0)
            correct += (predicted == label).sum().item()
            accuracy = (correct/total)

    return accuracy

In [16]:

#optimizer = optim.Adam(model.parameters(), lr = lr, weight_decay = 1.2e-3)

#EPS = 1e-6
# number of weight
a = ((model.fc1.weight != 0).sum(dim=1)).sum(dim=0) + ((model.fc2.weight != 0).sum(dim=1)).sum(dim=0) + ((model.fc3.weight != 0).sum(dim=1)).sum(dim=0)
#b = ((model.fc1.weight == 0).sum(dim=1)).sum(dim=0) + ((model.fc2.weight == 0).sum(dim=1)).sum(dim=0) + ((model.fc3.weight == 0).sum(dim=1)).sum(dim=0)
b = ((model.fc1.weight == 0).sum(dim=1)).sum(dim=0) + ((model.fc2.weight == 0).sum(dim=1)).sum(dim=0) + ((model.fc3.weight == 0).sum(dim=1)).sum(dim=0)

now = (a + b)

def calc_now(model):
    fc1_1 = ((model.fc1.weight != 0).sum(dim=1)).sum(dim=0).item()
    fc1_0 = ((model.fc1.weight == 0).sum(dim=1)).sum(dim=0).item()
    fc1 = fc1_1 + fc1_0
    fc1_p = fc1_0 / fc1_1
    fc2_1 = ((model.fc2.weight != 0).sum(dim=1)).sum(dim=0).item()
    fc2_0 = ((model.fc2.weight == 0).sum(dim=1)).sum(dim=0).item()
    fc2 = fc2_1 + fc2_0
    fc3_1 = ((model.fc3.weight != 0).sum(dim=1)).sum(dim=0).item()
    fc3_0 = ((model.fc3.weight == 0).sum(dim=1)).sum(dim=0).item()
    fc3 = fc3_1 + fc3_0
    #print(fc1, fc2, fc3, fc1+fc2+fc3, fc1_1 + fc2_1 + fc3_1 ,fc1_0 + fc2_0 + fc3_0)
    print("Remaining weight %.1f %%" %(((fc1_1+fc2_1+fc3_1)/(fc1+fc2+fc3))*100))
    print('total weight :',
        '%d' % (fc1+fc2+fc3),
         '(%d |' % (fc1_1+fc2_1+fc3_1),
         '%d)' % (fc1_0+fc2_0+fc3_0)
         )
    print('fc1 :',
        '%d' % fc1,
         '(%d |' % fc1_1,
         '%d)' % fc1_0
         )
    print('fc2 :',
        '%d' % fc2,
         '(%d |' % fc2_1,
         '%d)' % fc2_0
         )
    print('fc3 :',
        '%d' % fc3,
         '(%d |' % fc3_1,
         '%d)' % fc3_0
         )

a

b

weight_init(model, model_init, 1 - weight_remaining)

weight_init(model, model_init, 1 - weight_remaining)

model.state_dict().keys()

print(model.fc3.weight_orig)

for name, module in model.named_modules():
    if isinstance(module, nn.Linear):
        prune.l1_unstructured(module, name = 'weight', amount = 0.9)

    # init 값 복사
for name, p in model.named_parameters():
     if 'weight_orig' in name:
        for name2, p2 in model_init.named_parameters():
            if name[0:len(name) - 5] in name2:
                p.data = copy.deepcopy(p2.data)
                break
    if 'bias_orig' in name:
        for name2, p2 in modelinit.named_parameters():
            if name[0:len(name) - 5] in name2:
                p.data = copy.deepcopy(p2.data)
                break

for name, module in model.named_modules():
    if isinstance(module, nn.Linear):
        prune.remove(module, name = 'weight')

print(model.fc3.weight[0][0])

print(model_init.fc3.weight[0][0])

In [17]:
param.prune_per_c

0.15

In [19]:
model.state_dict().keys()

odict_keys(['conv1.bias', 'conv1.weight', 'conv2.bias', 'conv2.weight', 'conv3.bias', 'conv3.weight', 'conv4.bias', 'conv4.weight', 'conv5.bias', 'conv5.weight', 'conv6.bias', 'conv6.weight', 'fc1.bias', 'fc1.weight', 'fc2.bias', 'fc2.weight', 'fc3.bias', 'fc3.weight'])

model.conv1.weight[0]
for name, p in model.named_parameters():
        if 'weight' in name:
            print(model.conv1.weight)

for name, p in model.named_modules():
    print(p)

i = 0
for name, p in model.named_parameters():
    if 'weight' in name:
        print(p.grad.data * cp_mask[i])
        i += 1

model2[0].weight

for name, p in model.named_parameters():
    if 'weight' in name:
        

model.conv1.weight

cp_mask[0]

print(p[0])

j = 0

for name, p in model.named_parameters():
    if 'weight' in name:
        p.register_hook(lambda grad: grad.mul_(cp_mask[j]))
        print(name, len(cp_mask[j]))
        j += 1
        #print(p[0], cp_mask[j])

In [37]:
param = cu.parameters()

if model_type == 'LeNet300':
    model = cu.LeNet300().to(device)
elif model_type == 'Conv6':
    model = cu.Conv6().to(device)
    
param.type(model_type)    
model_init = copy.deepcopy(model)
criterion = nn.CrossEntropyLoss().to(device)

In [38]:
weight_counter(model)

   Layer                     Weight                Ratio(%)
all.weight   :        266200 (266200 | 0)          100.00
fc1.weight   :        235200 (235200 | 0)          100.00
fc2.weight   :         30000 (30000 | 0)           100.00
fc3.weight   :          1000 (1000 | 0)            100.00


{'all.weight': [266200, 266200, 0, 100.0],
 'fc1.weight': [235200, 235200, 0, 100.0],
 'fc2.weight': [30000, 30000, 0, 100.0],
 'fc3.weight': [1000, 1000, 0, 100.0]}

In [55]:
i = 0
cp_mask = torch.FloatTensor()
cp_mask = weight_init(model, model_init, 
                           (0.5),
                           (0.5),
                           (0.5)
                          )

In [35]:
model.fc1.weight

Parameter containing:
tensor([[ 0.0000, -0.0000,  0.0000,  ..., -0.0003, -0.0006, -0.0008],
        [ 0.0028, -0.0000,  0.0028,  ...,  0.0000, -0.0000,  0.0041],
        [-0.0000, -0.0020, -0.0046,  ..., -0.0029, -0.0000, -0.0000],
        ...,
        [-0.0037, -0.0024, -0.0036,  ..., -0.0000,  0.0035,  0.0033],
        [ 0.0000,  0.0000, -0.0021,  ..., -0.0040, -0.0039, -0.0000],
        [-0.0039,  0.0043,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:1', requires_grad=True)

In [56]:
cp_mask

[tensor([[1., 0., 1.,  ..., 1., 0., 0.],
         [1., 1., 1.,  ..., 1., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 1.],
         ...,
         [1., 1., 1.,  ..., 0., 0., 0.],
         [1., 1., 1.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 1., 0., 0.]], device='cuda:1'),
 tensor([[1., 0., 0.,  ..., 1., 1., 0.],
         [1., 0., 1.,  ..., 1., 0., 0.],
         [1., 0., 0.,  ..., 1., 0., 1.],
         ...,
         [0., 1., 1.,  ..., 1., 0., 1.],
         [1., 0., 0.,  ..., 0., 1., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:1'),
 tensor([[0., 1., 1., 0., 0., 1., 0., 0., 1., 0., 1., 1., 1., 0., 0., 0., 1., 0.,
          1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 0., 1., 1., 1., 0.,
          1., 1., 1., 0., 0., 1., 0., 1., 1., 1., 0., 1., 1., 0., 1., 0., 0., 0.,
          1., 0., 0., 1., 0., 1., 1., 0., 0., 1., 1., 1., 1., 0., 1., 0., 0., 0.,
          0., 1., 0., 1., 0., 1., 0., 0., 1., 1., 0., 1., 0., 0., 0., 1., 1., 0.,
          0., 1., 1., 0., 1., 1.

In [42]:
i = 0
a = cp_mask[i]
print(a[0][0])

tensor(0., device='cuda:1')


In [31]:
optimizer = optim.Adam(model.parameters(), lr = param.lr, weight_decay = param.weight_decay)

In [32]:
running_loss = train(model, param.train_loader, optimizer, criterion, cp_mask)

In [295]:
for name, i in model.named_parameters():
    print(name)
        

fc1.weight
fc1.bias
fc2.weight
fc2.bias
fc3.weight
fc3.bias


In [30]:
for name, i in model.named_parameters():
    #print(name, i)
    if 'fc1.weight' in name:
        print(len(cp_mask[0]))
        i.register_hook(lambda grad: grad * cp_mask[0])
    elif 'fc2.weight' in name:
        print(len(cp_mask[1]))
        i.register_hook(lambda grad: grad * cp_mask[1])
    elif 'fc3.weight' in name:
        print(len(cp_mask[2]))
        i.register_hook(lambda grad: grad * cp_mask[2])

300
100
10


In [57]:
count = 0
for name, i in model.named_parameters():
    if 'weight' in name:
        print(len(a))
        i.register_hook(lambda grad:grad.mul_(cp_mask[count]))
        count += 1

0
0
0


In [45]:
cp_mask = torch.tensor(cp_mask)

ValueError: only one element tensors can be converted to Python scalars

In [54]:
a

tensor([])

In [52]:
a = torch.FloatTensor()

In [53]:
type(a)

torch.Tensor

In [43]:
cp_mask.state_dict().keys()

AttributeError: 'list' object has no attribute 'state_dict'

In [162]:
model.fc1.weight.register_hook(lambda grad: grad * cp_mask[0])
model.fc2.weight.register_hook(lambda grad: grad * cp_mask[1])
model.fc3.weight.register_hook(lambda grad: grad * cp_mask[2])

In [164]:
a

<torch.utils.hooks.RemovableHandle at 0x7f647b409450>

In [77]:
w0 = model.fc3.weight.detach().clone()

In [78]:
w0

tensor([[ 0.0516,  0.0000,  0.0000,  0.0000,  0.0923,  0.0685,  0.0551,  0.0000,
          0.0000, -0.0000,  0.0000, -0.0000, -0.0000, -0.0000,  0.0674, -0.0621,
         -0.0839, -0.0829,  0.0839, -0.0896,  0.0872, -0.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0597, -0.0000,  0.0771,  0.0513, -0.0852,  0.0990,
         -0.0000,  0.0589, -0.0000, -0.0889, -0.0000,  0.0000, -0.0000, -0.0000,
          0.0780, -0.0000,  0.0628,  0.0927, -0.0846,  0.0000, -0.0783, -0.0729,
         -0.0000,  0.0000,  0.0513, -0.0834,  0.0904,  0.0688, -0.0000,  0.0696,
         -0.0597, -0.0995, -0.0000, -0.0000,  0.0708, -0.0896, -0.0633, -0.0000,
          0.0000,  0.0661,  0.0777,  0.0967, -0.0000, -0.0909, -0.0000, -0.0000,
          0.0846, -0.0000,  0.0791,  0.0534, -0.0000,  0.0000, -0.0000, -0.0827,
         -0.0931, -0.0810,  0.0000,  0.0000, -0.0912, -0.0633, -0.0000,  0.0806,
         -0.0000,  0.0000,  0.0000, -0.0917,  0.0511,  0.0000,  0.0795,  0.0556,
         -0.0000,  0.0000,  

In [124]:
optimizer = optim.Adam(model.parameters(), lr = param.lr, weight_decay = param.weight_decay)

In [54]:
print(cp_mask)

[tensor([[0., 1., 0.,  ..., 1., 1., 1.],
        [0., 0., 0.,  ..., 0., 1., 0.],
        [1., 0., 1.,  ..., 1., 0., 1.],
        ...,
        [0., 1., 1.,  ..., 1., 0., 1.],
        [1., 0., 0.,  ..., 0., 1., 0.],
        [1., 0., 0.,  ..., 1., 0., 1.]], device='cuda:1'), tensor([[0., 1., 1.,  ..., 1., 0., 0.],
        [0., 0., 1.,  ..., 0., 0., 0.],
        [0., 0., 1.,  ..., 0., 0., 0.],
        ...,
        [0., 1., 0.,  ..., 0., 0., 1.],
        [0., 1., 0.,  ..., 1., 1., 1.],
        [1., 1., 0.,  ..., 0., 0., 0.]], device='cuda:1'), tensor([[1., 0., 1., 0., 0., 0., 0., 1., 1., 1., 0., 1., 0., 0., 0., 0., 1., 0.,
         1., 0., 1., 0., 1., 1., 1., 0., 0., 0., 1., 0., 0., 1., 1., 1., 0., 1.,
         1., 1., 0., 0., 0., 1., 0., 0., 1., 1., 0., 1., 0., 1., 1., 1., 1., 1.,
         0., 1., 1., 0., 0., 1., 1., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1.,
         1., 1., 0., 1., 0., 1., 1., 1., 1., 1., 1., 1., 0., 0., 1., 0., 1., 1.,
         1., 0., 1., 1., 0., 0., 1., 1., 0., 1.],


In [70]:
model.fc2.weight

Parameter containing:
tensor([[ 0.0000, -0.0470, -0.0452,  ..., -0.0340, -0.0000, -0.0000],
        [-0.0000,  0.0000, -0.0345,  ..., -0.0000, -0.0000,  0.0000],
        [ 0.0000, -0.0000,  0.0454,  ..., -0.0000, -0.0000, -0.0000],
        ...,
        [-0.0000,  0.0402, -0.0000,  ...,  0.0000, -0.0000, -0.0539],
        [ 0.0000,  0.0544, -0.0000,  ...,  0.0360, -0.0329, -0.0534],
        [ 0.0302, -0.0513, -0.0000,  ...,  0.0000, -0.0000, -0.0000]],
       device='cuda:1', requires_grad=True)

In [69]:
for name, p in model.named_parameters():
    if name =='fc2.weight':
        print(p)

Parameter containing:
tensor([[ 0.0000, -0.0470, -0.0452,  ..., -0.0340, -0.0000, -0.0000],
        [-0.0000,  0.0000, -0.0345,  ..., -0.0000, -0.0000,  0.0000],
        [ 0.0000, -0.0000,  0.0454,  ..., -0.0000, -0.0000, -0.0000],
        ...,
        [-0.0000,  0.0402, -0.0000,  ...,  0.0000, -0.0000, -0.0539],
        [ 0.0000,  0.0544, -0.0000,  ...,  0.0360, -0.0329, -0.0534],
        [ 0.0302, -0.0513, -0.0000,  ...,  0.0000, -0.0000, -0.0000]],
       device='cuda:1', requires_grad=True)


In [93]:
j = 0
for name, p in model.named_parameters():
    if 'weight' in name:
        print(j)
        #p.register_hook(lambda grad: grad.mul_(cp_mask[j]))
        #print(name)
        #print(p[0], cp_mask[j][0])
        print(cp_mask[j])
        j += 1

0
tensor([[0., 1., 1.,  ..., 0., 1., 0.],
        [1., 0., 1.,  ..., 1., 0., 1.],
        [0., 1., 1.,  ..., 0., 0., 1.],
        ...,
        [0., 1., 1.,  ..., 1., 1., 0.],
        [1., 0., 1.,  ..., 0., 0., 0.],
        [0., 1., 0.,  ..., 1., 0., 0.]], device='cuda:1')
1
tensor([[1., 0., 0.,  ..., 1., 1., 1.],
        [0., 1., 0.,  ..., 0., 0., 0.],
        [0., 1., 1.,  ..., 0., 1., 0.],
        ...,
        [0., 0., 1.,  ..., 1., 1., 0.],
        [1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 0.,  ..., 1., 1., 1.]], device='cuda:1')
2
tensor([[0., 1., 1., 1., 1., 0., 0., 1., 0., 1., 1., 0., 0., 1., 1., 1., 0., 1.,
         0., 0., 0., 0., 0., 1., 1., 1., 1., 0., 1., 1., 1., 0., 1., 0., 1., 0.,
         1., 0., 0., 1., 0., 1., 1., 0., 0., 1., 0., 1., 0., 1., 1., 0., 1., 1.,
         1., 1., 1., 1., 0., 1., 1., 1., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0.,
         1., 1., 0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 1., 1., 0., 1.,
         1., 0., 1., 1., 1., 1., 0., 1., 0., 0.

In [18]:
for i in range(param.noi):
    best_accu.append(0)
    best_accu[i] = [0, 0, 0]
    cp_mask = []

    # pruning 및 mask 복사
    # layer별 prune rate를 입력
    cp_mask = weight_init(model, model_init, 
                           (1 - ((1-param.prune_per_c) ** i)),
                           (1 - ((1-param.prune_per_f) ** i)),
                           (1 - ((1-param.prune_per_o) ** i))
                          )
    #model2[0].weight.register_hook(lambda grad: grad * gradient_mask)
    j = 0
    for name, p in model.named_parameters():
        if 'weight' in name:
            p.register_hook(lambda grad: grad.mul_(cp_mask[j]))
            print(name)
            print(p[0], cp_mask[j][0])
            j += 1
    
    
    optimizer = optim.Adam(model.parameters(), lr = param.lr, weight_decay = param.weight_decay)
    print("Learning start!\n")
    # weight 개수 계산 및 저장
    
    #iteration 횟수 = i
    
    weight_counts = weight_counter(model)
    
    #print(model.conv1.weight[0])
    #print(model.fc3.weight[0])
    
    
    remaining_weight = weight_counts['all.weight'][3]
    
    start_time = timeit.default_timer()
    
    for epoch in tqdm(range(param.epochs)):
        # epoch가 0일때 정확도 계산
        if epoch == 0:
            accuracy, test_loss = test(model, param.test_loader, criterion)
            visdom_plot(vis_plt,torch.Tensor([accuracy]), torch.Tensor([0]),
                        str(remaining_weight)
                       )
            print('[epoch : %d]' % (epoch),
             '(r_loss: x.xxxxx)',
             '(t_loss: x.xxxxx)',
             '(accu: %.4f)' % (accuracy)
             )
        # model training    
        running_loss = train(model, param.train_loader, optimizer, criterion, cp_mask)
        
        # val_set이 있을 경우 val_set을 통해 loss, accu를 구한다.
        if param.valset == 'empty':
            accuracy, test_loss = test(model, param.test_loader, criterion)
        else:
            accuracy, test_loss = test(model, param.val_loader, criterion)
        
        # visdom plot
        visdom_plot(vis_plt, torch.Tensor([accuracy]), torch.Tensor([(epoch+1) * 1000]),
                    str(remaining_weight)
                   )
        
        # best accuracy list (weight_remain, epoch, accuracy)
        if best_accu[i][2] <= accuracy:
            best_accu[i] = [remaining_weight, epoch, accuracy]
        
        print('[epoch : %d]' % (epoch+1),
             '(r_loss: %.5f)' % (running_loss),
             '(t_loss: %.5f)' % (test_loss),
             '(accu: %.4f)' % (accuracy)
             )
    stop_time = timeit.default_timer()
    #print(model.fc3.weight[0][0])
    #print(model_init.fc3.weight[0][0])
    
    #print(model.fc3.weight[0])
    
    #print(model.conv1.weight[0])
    #print(model.fc3.weight[0])
    
    print("Finish!",
          "(Best accu: %.4f)" % best_accu[i][2],
          "(Time taken(sec) : %.2f)" % (stop_time - start_time),
          "\n\n\n\n\n\n\n")

conv1.weight
tensor([[[ 0.1292,  0.1685,  0.0939],
         [ 0.0260,  0.1417, -0.0091],
         [-0.1594,  0.1055,  0.1800]],

        [[-0.1203, -0.1542, -0.1689],
         [ 0.1283,  0.1091,  0.0083],
         [ 0.1156,  0.0912, -0.0722]],

        [[-0.0412, -0.0720, -0.1287],
         [ 0.1364,  0.0321,  0.0139],
         [-0.0617, -0.0857, -0.0816]]], device='cuda:1',
       grad_fn=<SelectBackward>) tensor([[[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]],

        [[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]],

        [[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]]], device='cuda:1')
conv2.weight
tensor([[[-0.0229, -0.0392, -0.0019],
         [ 0.0119, -0.0340, -0.0226],
         [-0.0115, -0.0108,  0.0122]],

        [[ 0.0262, -0.0175,  0.0302],
         [-0.0141,  0.0091,  0.0263],
         [-0.0217,  0.0252,  0.0401]],

        [[ 0.0354, -0.0386,  0.0379],
         [-0.0086,  0.0101, -0.0118],
         [ 0.0361,  0.0321,  0.0376]],

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

[epoch : 0] (r_loss: x.xxxxx) (t_loss: x.xxxxx) (accu: 0.1000)



RuntimeError: list index out of range

In [None]:
print(model.conv1.weight[0][0])

In [None]:
model2 = nn.Sequential(
    nn.Linear(2, 2),
    nn.Sigmoid(),
    nn.Linear(2, 2)
)
print(model2[0].weight)

In [None]:
model2.state_dict().keys()

In [None]:
print(model2[0].weight.grad)

In [None]:
 for name, p in model2.named_modules():
        print(p)

In [None]:
if x > 0:
	value = 10
else:
	value = 20

value = 10 if x > 0 else 20

In [None]:
# Create Gradient mask
gradient_mask = torch.zeros(2, 2)
gradient_mask[0, 0] = 1.0
#model2[0].weight.register_hook(lambda grad: grad.mul_(gradient_mask))
model2[0].weight.register_hook(lambda grad: grad * gradient_mask)
print(model2[0].weight)

In [None]:
model2[0].weight[0][0]

In [None]:
for name, p in model2.named_parameters():
    print(name)
    if 'weight' in name:
        p.register_hook(lambda grad: grad * 0 if p != 0 else grad)
    #for i in range(len(p)):
        #for j in range(len([0][p])):
            #print(p[i][j])    
    #print(p != 0)

In [None]:
for name, p in model2.named_modules():
    print(name)
    

In [None]:
optimizer = optim.SGD(model2.parameters(), lr=1.0, weight_decay = 0.003)
criterion = nn.CrossEntropyLoss()

In [None]:
batch_size = 10
x = torch.randn(batch_size, 2)
target = torch.randint(0, 2, (batch_size,))

optimizer.zero_grad()
output = model2(x)
loss = criterion(output, target)

In [None]:
# grad 생성
loss.backward()

In [None]:
print('Gradient: ', model2[0].weight.grad)

In [None]:
# Get weight before training
#w0 = model2[0].weight.detach().clone()

# Single training iteration
optimizer.step()

# Compare weight update
w1 = model2[0].weight.detach().clone()
print('Weights updated ', w0!=w1)

In [None]:
w0

In [None]:
w1

In [None]:
model2[0].weight

In [None]:
model2[0].weight

In [None]:
gradient_mask

In [None]:
model2[0].weight

In [None]:
w0

In [None]:
model2[0].weight

#name, all, non_zero, zero, per
def weight_counter(model):
    layer_weight = []
    all_weight = ['All_weight',0 ,0 ,0, 0]
    for name, p in model.named_parameters():
        if 'weight' in name:
            none_zero_w = (p != 0).sum().item()
            zero_w = (p == 0).sum().item()
            all_w = none_zero_w + zero_w

            all_weight[1] += all_w
            all_weight[2] += none_zero_w
            all_weight[3] += zero_w

            layer_weight.append([name, all_w, none_zero_w, zero_w, round(none_zero_w/all_w,4)])

    all_weight[4] = round((all_weight[2]/all_weight[1]), 4)
    layer_weight.insert(0, all_weight)
    for i in range(len(layer_weight)):
        print(layer_weight[i])
    
    return layer_weight

In [None]:
weight_counter(model)

In [None]:
(model.conv1.weight != 0).sum()

In [None]:
(model.conv1.weight != 0).sum()

In [None]:
(model.conv1.weight == 0).sum()

In [None]:
(model.fc1.weight != 0).sum(dim=1).sum(dim=0)

In [None]:
(model.fc1.weight != 0).sum()

def calc_now(model):
    fc1_1 = ((model.fc1.weight != 0).sum(dim=1)).sum(dim=0).item()
    fc1_0 = ((model.fc1.weight == 0).sum(dim=1)).sum(dim=0).item()
    fc1 = fc1_1 + fc1_0
    fc1_p = fc1_0 / fc1_1
    fc2_1 = ((model.fc2.weight != 0).sum(dim=1)).sum(dim=0).item()
    fc2_0 = ((model.fc2.weight == 0).sum(dim=1)).sum(dim=0).item()
    fc2 = fc2_1 + fc2_0
    fc3_1 = ((model.fc3.weight != 0).sum(dim=1)).sum(dim=0).item()
    fc3_0 = ((model.fc3.weight == 0).sum(dim=1)).sum(dim=0).item()
    fc3 = fc3_1 + fc3_0
    #print(fc1, fc2, fc3, fc1+fc2+fc3, fc1_1 + fc2_1 + fc3_1 ,fc1_0 + fc2_0 + fc3_0)
    print("Remaining weight %.1f %%" %(((fc1_1+fc2_1+fc3_1)/(fc1+fc2+fc3))*100))
    print('total weight :',
        '%d' % (fc1+fc2+fc3),
         '(%d |' % (fc1_1+fc2_1+fc3_1),
         '%d)' % (fc1_0+fc2_0+fc3_0)
         )
    print('fc1 :',
        '%d' % fc1,
         '(%d |' % fc1_1,
         '%d)' % fc1_0
         )
    print('fc2 :',
        '%d' % fc2,
         '(%d |' % fc2_1,
         '%d)' % fc2_0
         )
    print('fc3 :',
        '%d' % fc3,
         '(%d |' % fc3_1,
         '%d)' % fc3_0
         )

In [None]:
for name, module in model1.named_modules():
    if isinstance(module, nn.Conv2d):
        prune.remove(module, name = 'weight')
    elif isinstance(module, nn.Linear):
        prune.remove(module, name = 'weight')

In [None]:
print(model.fc3.weight[0])

In [None]:
print("Maximum accuracy per weight remaining")
for i in range(len(best_accu)):
    print("Remaining weight %.1f %% " % (best_accu[i][0] * 100),
         "Epoch %d" % best_accu[i][1],
         "Accu %.4f %%" % best_accu[i][2])

model2 = cu.LeNet300().to(device)

model2.state_dict().keys()

param.type("LeNet300")

model2.fc3.weight

cp_mask = weight_init(model, model_init,
                              0,
                              0,
                              0
                             )

model2.fc3.weight

class abc:
    def __init__(self):
        self.a = 1
        self.b = 2
        d = 3
        c = self.a

aa = abc()

aa.d

print(aa)

for name, p in model.named_parameters():
    EPS = 1e-6
    if 'weight' in name:
        tensor = p.data.cpu().numpy()
        grad_tensor = p.grad.data.cpu().numpy()
        grad_tensor = np.where(tensor < EPS, 0, grad_tensor)
        p.grad.data = torch.from_numpy(grad_tensor).to(device)
        print(p.grad.data)

데이터 숫자 60000
배치 길이 60
배치 개수 1000
epoch = 50

이터레이션 횟수 50000