In [1]:
import torch
import torch.nn as nn
import copy
from pyhocon import ConfigFactory
from models.resnet import resnet50im

In [2]:
conf = ConfigFactory.parse_file("./resnet50_imagenet.hocon")
getattr(conf, 'data_path')

In [4]:
t = resnet50im(pretrained=False)

TypeError: object of type 'ResNet_im' has no len()

In [7]:
f = getattr(conf, 'date', None)
print(f)

None


In [8]:
for k, v in sorted(conf.items()):
    print(k)

GPU
activation_index
activation_step
batch_size
data_path
dataset
epochs
load_state_dict
log_override
lr
lr_gamma
ml_step
model_name
momentum
nGPU
nesterov
optimizer
save_path
scheduler
seed
train
visible_devices
warmup
weight_decay
worker


In [5]:
# This Matrix Adder (M, N) -> M
def MatrixAdder(tensor, AdderType="FP16"):
    epsilon = 1e-10
    if not (len(tensor.shape) == 2 or len(tensor.shape) == 3) :
        AssertionError(f"It only supported 2d Matrix, this tensor shape {tensor.shape}")

    if AdderType=="FP16":
        mantissa = 10
    elif AdderType=="BF16":
        mantissa = 7
    elif AdderType=="FP32":
        mantissa = 22
    else:
        AssertionError("This Adder only supported FP16|BF16|FP32")
    
    temp_tensor =tensor.clone()
    zero_mask_counter = []
    for i in range(tensor.shape[1] -1):
        prev = temp_tensor[:, i]
        prec = temp_tensor[:, i+1]
        log_prev = torch.log2(torch.abs(prev)+epsilon)
        log_prec = torch.log2(torch.abs(prec)+epsilon)
        zero_mask = torch.abs(log_prec-log_prev) > mantissa
        max_log_tensor = prec.clone()
        max_log_tensor[log_prec<log_prev] = prev[log_prec<log_prev] # 두 벡터 중 log2 의 value가 큰 값을 가지고 있는 vector 생성
        output = prec+prev # 두 벡터를 더함
        output[zero_mask] = max_log_tensor[zero_mask] # zero_mask에 해당하는 부분은 log2 value가 큰 값만 저장
        temp_tensor[:, i+1] = output
        zero_mask_counter.append(zero_mask)
    
    return temp_tensor[:, -1], zero_mask_counter


In [16]:
f = torch.randn(4, 30) **2
print(f)
ot, zero_mask_counter = MatrixAdder(f, "BF16")

tensor([[3.9370e+00, 7.8298e-01, 1.5034e-01, 2.4758e+00, 1.3864e+00, 5.6828e-01,
         4.7699e+00, 1.8230e+00, 2.9456e+00, 5.1370e-01, 5.0717e-02, 6.2752e-02,
         8.4385e-02, 5.9801e-01, 6.8525e+00, 2.9430e+00, 1.6369e+00, 2.2257e+00,
         6.8369e-01, 1.0105e+00, 5.6695e-01, 1.7879e-03, 4.0398e-01, 2.6070e-01,
         2.3527e-05, 4.6133e-01, 1.3302e+00, 6.8196e-01, 4.8022e-03, 5.0157e-01],
        [4.2412e-01, 1.5789e-01, 9.6760e-01, 2.2764e-01, 4.2269e+00, 1.1610e+00,
         1.8820e-02, 1.1281e+00, 5.8814e-02, 1.6904e+00, 7.4702e-01, 7.3028e-02,
         1.8471e+00, 4.2020e-02, 1.6039e+00, 3.8327e-03, 1.5869e+00, 3.2844e-02,
         1.1708e+00, 5.9470e-02, 2.6630e-03, 4.2497e-01, 2.6800e-01, 2.0122e-01,
         1.1860e+00, 1.6024e+00, 4.6157e-02, 9.1540e-02, 5.6143e-01, 5.1917e-02],
        [1.0231e-01, 4.4194e-01, 3.2181e-01, 1.7558e-02, 1.0916e+00, 1.3198e+00,
         3.2256e-01, 1.7297e+00, 1.2794e-01, 1.5333e+00, 3.5121e-01, 1.5289e+00,
         9.8581e-01, 2.334

In [17]:
zero_mask_counter

[tensor([False, False, False, False]),
 tensor([False, False, False, False]),
 tensor([False, False, False, False]),
 tensor([False, False, False,  True]),
 tensor([False, False, False, False]),
 tensor([False,  True, False, False]),
 tensor([False, False, False, False]),
 tensor([False,  True, False, False]),
 tensor([False, False, False,  True]),
 tensor([ True, False, False,  True]),
 tensor([ True,  True, False, False]),
 tensor([ True, False, False,  True]),
 tensor([False,  True, False, False]),
 tensor([False, False, False, False]),
 tensor([False,  True, False,  True]),
 tensor([False, False, False, False]),
 tensor([False,  True, False, False]),
 tensor([False, False,  True,  True]),
 tensor([False,  True, False,  True]),
 tensor([False,  True,  True, False]),
 tensor([ True, False,  True, False]),
 tensor([False, False, False,  True]),
 tensor([ True, False,  True,  True]),
 tensor([ True, False,  True, False]),
 tensor([False, False,  True, False]),
 tensor([False,  True, Fa

In [11]:
# a의 값이 % 4 == 0 면 더하고 아니면 그대로 값을 넘겨줌
a = torch.arange(20).reshape(4,5)
for i in range(a.shape[1] -1):
    print(a)
    f = a[:, i+1]
    g = a[:, i]
    print("f", f)
    print("g", g)
    mask = (f % 4 == 0)
    print(mask)
    out = f+g
    print("f+g", out)
    out[mask] = f[mask]
    print("rull_out :", out)
    a[:, i+1] = out
print(a)

tensor([[ 0,  1,  3,  6,  4],
        [ 5, 11, 18,  8, 17],
        [10, 21, 12, 25, 39],
        [15, 16, 33, 51, 70]])
f tensor([ 1, 11, 21, 16])
g tensor([ 0,  5, 10, 15])
tensor([False, False, False,  True])
f+g tensor([ 1, 16, 31, 31])
rull_out : tensor([ 1, 16, 31, 16])
tensor([[ 0,  1,  3,  6,  4],
        [ 5, 16, 18,  8, 17],
        [10, 31, 12, 25, 39],
        [15, 16, 33, 51, 70]])
f tensor([ 3, 18, 12, 33])
g tensor([ 1, 16, 31, 16])
tensor([False, False,  True, False])
f+g tensor([ 4, 34, 43, 49])
rull_out : tensor([ 4, 34, 12, 49])
tensor([[ 0,  1,  4,  6,  4],
        [ 5, 16, 34,  8, 17],
        [10, 31, 12, 25, 39],
        [15, 16, 49, 51, 70]])
f tensor([ 6,  8, 25, 51])
g tensor([ 4, 34, 12, 49])
tensor([False,  True, False, False])
f+g tensor([ 10,  42,  37, 100])
rull_out : tensor([ 10,   8,  37, 100])
tensor([[  0,   1,   4,  10,   4],
        [  5,  16,  34,   8,  17],
        [ 10,  31,  12,  37,  39],
        [ 15,  16,  49, 100,  70]])
f tensor([ 4, 17, 39