In [None]:
import os
import shutil
import random
import pickle
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
#from tqdm import tqdm
import copy

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

import binarybrain as bb

bb.set_host_only(True)

In [None]:
class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None # softmaxの出力
        self.t = None # 教師データ

    def forward(self, x, t):
        shape = x.shape
        x = x.reshape(x.shape[0], x.shape[1], -1).transpose(1, 0, 2)
        t = t.reshape(t.shape[0], t.shape[1], -1).transpose(1, 0, 2)
        
        # softmax
        x = x - np.max(x, axis=0)
        y = np.exp(x) / np.sum(np.exp(x), axis=0)
#       y = y * np.sum(t, axis=0)
        
        dx = (y * np.sum(t, axis=0) - t) / np.sum(t)
        self.dx = dx.transpose(1, 0, 2).reshape(shape)
        
        return -np.sum(np.log(y[t>0])*t[t>0]) / np.sum(t)
        
#       return -np.sum(np.log(y[t>0])*t[t>0]) / np.sum(t)
#       return -np.sum(np.log(y[t>0])) / np.sum(t)
        
    def backward(self, dout=1):
        return self.dx

# ウェイト無し

In [None]:
n = 128
c = 10
h = 17
w = 28

x  = np.random.normal(size=(n, c, h, w)).astype(np.float32)
t  = np.random.randint(0, c, size=(n, h, w), dtype=np.int64)
tt = np.identity(c)[t].astype(np.float32).transpose(0, 3, 1, 2)
x_torch = torch.tensor(x.copy(), requires_grad=True)
t_torch = torch.from_numpy(t)

x_bb = bb.FrameBuffer.from_numpy(x)
t_bb = bb.FrameBuffer.from_numpy(tt)

criterion       = SoftmaxWithLoss()
criterion_torch = nn.CrossEntropyLoss()
criterion_bb    = bb.LossSoftmaxCrossEntropy()

dy = criterion.forward(x, tt)
dx = criterion.backward(dy)

loss_torch = criterion_torch(x_torch, t_torch)
loss_torch.backward()

dy_bb = criterion_bb.calculate(x_bb, t_bb)

print('---loss---')
print(dy)
print(loss_torch.item())
print(criterion_bb.get())

print('---diff---')
print(np.std(x_torch.grad.detach().numpy() - dx))
print(np.max(x_torch.grad.detach().numpy() - dx))
print(np.min(x_torch.grad.detach().numpy() - dx))

print(np.std(x_torch.grad.detach().numpy() - dy_bb.numpy()))
print(np.max(x_torch.grad.detach().numpy() - dy_bb.numpy()))
print(np.min(x_torch.grad.detach().numpy() - dy_bb.numpy()))

print('---conent---')
print(dx[0,:,0,0])
print(x_torch.grad.detach().numpy()[0,:,0,0])
print(dy_bb.numpy()[0,:,0,0])

# ウェイトあり

In [None]:
n = 512
c = 10
h = 31
w = 28

#w = np.ones(c).astype(np.float32)
#w[5] *= 0.5
weight = np.random.rand(c).astype(np.float32)
weight /= np.sum(weight)

x  = np.random.normal(size=(n, c, h, w)).astype(np.float32)
t  = np.random.randint(0, c, size=(n, h, w), dtype=np.int64)
tt = np.identity(c)[t].astype(np.float32).transpose(0, 3, 1, 2)
tw = (tt.transpose(0, 2, 3, 1) * weight).transpose(0, 3, 1, 2)

x_torch = torch.tensor(x.copy(), requires_grad=True)
t_torch = torch.from_numpy(t)
w_torch = torch.from_numpy(weight)

x_bb = bb.FrameBuffer.from_numpy(x)
t_bb = bb.FrameBuffer.from_numpy(tw)

criterion       = SoftmaxWithLoss()
criterion_torch = nn.CrossEntropyLoss(weight=w_torch)
criterion_bb    = bb.LossSoftmaxCrossEntropy()

dy = criterion.forward(x, tw)
dx = criterion.backward(dy)

loss_torch = criterion_torch(x_torch, t_torch)
loss_torch.backward()

dy_bb = criterion_bb.calculate(x_bb, t_bb)


print('---loss---')
print(dy)
print(loss_torch.item())
print(criterion_bb.get())

print('---diff---')
print(np.std(x_torch.grad.detach().numpy() - dx))
print(np.min(x_torch.grad.detach().numpy() - dx))
print(np.max(x_torch.grad.detach().numpy() - dx))

print(np.std(x_torch.grad.detach().numpy() - dy_bb.numpy()))
print(np.min(x_torch.grad.detach().numpy() - dy_bb.numpy()))
print(np.max(x_torch.grad.detach().numpy() - dy_bb.numpy()))

print('---content---')
print(dx[0,:,0,0])
print(x_torch.grad.detach().numpy()[0,:,0,0])
print(dy_bb.numpy()[0,:,0,0])