# Load data by tensorflow

import torch and tensorflow

set memory usage

In [1]:
import os
import random

import numpy as np
import tensorflow as tf
import tensorflow.contrib.eager as tfe

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as nfunc
from torch.nn.parameter import Parameter
from torch.utils.data import TensorDataset, DataLoader

gpu = "5"

os.environ["TF_CPP_MIN_LOG_LEVEL"] = '2'
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ['CUDA_VISIBLE_DEVICES'] = gpu

device = torch.device("cuda" if gpu else "cpu")

tf_config = tf.ConfigProto()
tf_config.gpu_options.allow_growth = True
tf_config.gpu_options.per_process_gpu_memory_fraction = 0.3

tf.enable_eager_execution(tf_config)

In [6]:
%load_ext autoreload
%autoreload 2

from tf_func import data_loader
from tf_func import mnist_model
from torch_func.load_dataset import load_dataset

class ConfigDict(object):
    """MNIST configration."""

    def __init__(self):
        self.num_classes = 10

        # List of tuples specify (kernel_size, number of filters) for each layer.
        self.filter_sizes_conv_layers = [(5, 32), (5, 64)]
        # Dictionary of pooling type ("max"/"average", size and stride).
        self.pool_params = {"type": "max", "size": 2, "stride": 2}
        self.num_units_fc_layers = [512]
        self.dropout_rate = 0
        self.batch_norm = True
        self.activation = None
        self.regularizer = None
        
        
config = ConfigDict()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# pytorch

In [23]:
def weights_init(m):
    """
    initialize normal distribution weight matrix
    and set bias to 0
    :param m:
    :return:
    """
    class_name = m.__class__.__name__
    fan_in = 0
    if class_name.find('Conv') != -1:
        shape = m.weight.data.shape
        fan_in = shape[1] * shape[2] * shape[3]
    if class_name.find('Linear') != -1:
        shape = m.weight.data.shape
        fan_in = shape[1]
    if fan_in:
        s = 1.0 * np.sqrt(6.0 / fan_in)
        transpose = np.random.uniform(-s, s, m.weight.data.shape).astype("float32")
        if debug:
            print(shape, transpose.sum())
        tensor = torch.from_numpy(transpose)
        m.weight = Parameter(tensor, requires_grad=True)
        if m.bias is not None:
            m.bias.data.zero_()
            
def set_framework_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

In [12]:
def evaluate_classifier(model, test_iter, device):
    total_acc = 0
    total_loss = 0
    size = 0
    model.eval()
    criterion = nn.CrossEntropyLoss()
    with torch.no_grad():
        for images, labels in test_iter:
            size += images.numpy().shape[0]
            images = torch.FloatTensor(images.numpy()).permute(0, 3, 1, 2).to(device)
            labels = torch.LongTensor(labels.numpy()).to(device)
            logits = model(images)
            total_loss += criterion(logits, labels).item() * images.shape[0]
            pred_y = torch.max(logits, dim=1)[1]        
            total_acc += (pred_y == labels).sum().item()
    model.train()
    return total_acc / size, total_loss / size

In [25]:
def call_bn(bn, x, update_batch_stats=True):
    if bn.training is False:
        return bn(x)
    elif not update_batch_stats:
        return nfunc.batch_norm(x, None, None, bn.weight, bn.bias, True, bn.momentum, bn.eps)
    else:
        return bn(x)
    
    
class MLP(nn.Module):
    def __init__(self, layer_sizes, affine=False, top_bn=True):
        super(MLP, self).__init__()
        self.input_len = 1 * 28 * 28
        self.fc1 = nn.Linear(self.input_len, 1200)
        self.fc2 = nn.Linear(1200, 1200)
        self.fc3 = nn.Linear(1200, 10)

        self.bn_fc1 = nn.BatchNorm1d(1200, affine=affine)
        self.bn_fc2 = nn.BatchNorm1d(1200, affine=affine)
        self.top_bn = top_bn
        if top_bn:
            self.bn_fc3 = nn.BatchNorm1d(10, affine=affine)

    def forward(self, x, update_batch_stats=True, return_h=False):
        endpoints = {}
        h = nfunc.relu(call_bn(self.bn_fc1, self.fc1(x.view(-1, self.input_len)), update_batch_stats))
        endpoints["fc_layer0"] = h
        h = nfunc.relu(call_bn(self.bn_fc2, self.fc2(h), update_batch_stats))
        endpoints["fc_layer1"] = h
        if self.top_bn:
            h = call_bn(self.bn_fc3, self.fc3(h), update_batch_stats)
        else:
            h = self.fc3(h)
        logits = h
        if return_h:
            return logits, endpoints
        else:
            return logits


In [31]:
from torch_func.vat import VAT

In [36]:
import random
set_framework_seed(1)
device = torch.device("cpu")
train_l, train_ul, test_set = load_dataset("mnist", valid=True, dataset_seed=1, size=100)
print("N_train_labeled:{}, N_train_unlabeled:{}".format(train_l.N, train_ul.N))

test_set = TensorDataset(torch.FloatTensor(test_set.data), torch.LongTensor(test_set.label))
test_loader = DataLoader(test_set, 128, False)

# Define losses.
criterion = nn.CrossEntropyLoss()
debug = True

layer_sizes = [784, 1200, 1200, 10]

N_train_labeled:100, N_train_unlabeled:59000


In [40]:
model = MLP(layer_sizes=layer_sizes, affine=False, top_bn=True)
debug = True
set_framework_seed(1)
model.apply(weights_init)
model = model.to(device)
optimizer = optim.Adam(list(model.parameters()), lr=0.002)

scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1,
                                      gamma=0.95)

n_train = train_l.label.shape[0]
n_ul_train = train_ul.label.shape[0]
batch_size_l = 100
batch_size_ul = 250
eps = 0.3
xi = 0.000001
debug = False
for epoch in range(10):
    l_i, ul_i = 0, 0

    rand_ind = np.random.permutation(train_l.label.shape[0])
    train_images = train_l.data[rand_ind]
    train_labels = train_l.label[rand_ind]
    rand_ind = np.random.permutation(train_ul.data.shape[0])
    train_ul_images = train_ul.data[rand_ind]
    for i in range(1):

        images = torch.FloatTensor(train_images[batch_size_l*l_i:batch_size_l*(l_i + 1)])
        labels = torch.LongTensor(train_labels[batch_size_l*l_i:batch_size_l*(l_i + 1)])
        print("x", images.sum().item())
        images, labels = images.to(device), labels.to(device)

        batch_ind = np.random.choice(np.arange(train_ul_images.shape[0]), batch_size_ul)
        ul_images = torch.FloatTensor(train_ul_images[batch_ind])
        print("ul x", ul_images.sum().item())
        # ul_images = torch.FloatTensor(train_ul_images[batch_size_ul*l_i:batch_size_ul*(l_i + 1)])
        ul_images = ul_images.to(device)

        l_i = 0 if l_i >= n_train / batch_size_l - 1 else l_i + 1
        ul_i = 0 if ul_i >= n_ul_train / batch_size_ul - 1 else ul_i + 1

        logits = model(images)

        total_loss = 0
        sup_loss = 0

        # supervised loss
        xent_loss = criterion(logits, labels)
        print("sup", xent_loss.item())
        sup_loss += xent_loss

        vat_criterion = VAT(device, eps=eps, xi=xi, use_entmin=False, debug=debug)
        unsup_loss = vat_criterion(model, ul_images)
        print("unsup", unsup_loss)
        total_loss += sup_loss + unsup_loss

        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    if scheduler:
        scheduler.step()


torch.Size([1200, 784]) -13.070702
torch.Size([1200, 1200]) -44.62378
torch.Size([10, 1200]) -3.5299373
x 10491.24609375
ul x 25582.734375
sup 2.523423433303833
unsup tensor(0.1865, grad_fn=<MeanBackward1>)
x 10491.24609375
ul x 26232.44140625
sup 0.5586448907852173
unsup tensor(0.0317, grad_fn=<MeanBackward1>)
x 10491.24609375
ul x 25147.70703125
sup 0.45668429136276245
unsup tensor(0.0203, grad_fn=<MeanBackward1>)
x 10491.24609375
ul x 25282.7109375
sup 0.41053539514541626
unsup tensor(0.0171, grad_fn=<MeanBackward1>)
x 10491.24609375
ul x 25813.4609375
sup 0.3806508183479309
unsup tensor(0.0167, grad_fn=<MeanBackward1>)
x 10491.24609375
ul x 24906.30859375
sup 0.3595522344112396
unsup tensor(0.0178, grad_fn=<MeanBackward1>)
x 10491.24609375
ul x 25616.22265625
sup 0.3443249762058258
unsup tensor(0.0168, grad_fn=<MeanBackward1>)
x 10491.24609375
ul x 25217.87109375
sup 0.3329668343067169
unsup tensor(0.0181, grad_fn=<MeanBackward1>)
x 10491.24609375
ul x 25456.51953125
sup 0.32429847

# CrossEntropy

It works fine for Chainer and PyTorch

In [106]:
print("pytorch, cross entropy")
config.batch_norm = True
debug = False
affine = False
model = MLP(config)
np.random.seed(1)
model.apply(weights_init)
model = model.to(device)
model.train()

max_iters = 3
lr = 0.002

torch_optimizer = optim.Adam(list(model.parameters()), lr)

criterion = nn.CrossEntropyLoss(reduction='none')
iterator = dataset.dataset.make_one_shot_iterator()


for i in range(5):
    x, y = iterator.get_next()
    print("data %.5f" % x.numpy().sum())
    images = torch.FloatTensor(x.numpy()).permute(0, 3, 1, 2).to(device)
    labels = torch.LongTensor(y.numpy()).to(device)

    # Build model.
    logits = model(images)
    print("logits", (logits ** 2).sum().item())
    total_loss = 0
    loss_list = criterion(logits, labels)
    xent_loss = torch.mean(loss_list)
    total_loss = xent_loss

    # vat_criterion = VAT(device, 0.3, 1e-6, use_entmin=False)
    # vat_loss = vat_criterion(model, images)
    
    torch_optimizer.zero_grad()
#     total_loss.backward()
#     torch_optimizer.step()
#     torch_optimizer.zero_grad()
    print("iter %d,  train loss %.5f vat loss %.5f\n" % (i, total_loss.item(), vat_loss.item()))
    if (i+1) % 10 == 0:
        debug = False
        
        acc, loss = evaluate_classifier(model, test_dataset.dataset.make_one_shot_iterator(), device)
        print("test acc %.5f,  loss %.5f" % (acc, loss))
    
debug = False
acc, loss = evaluate_classifier(model, test_dataset.dataset.make_one_shot_iterator(), device)
print("test acc %.5f,  loss %.5f" % (acc, loss))

pytorch, cross entropy
data 12790.14453
logits 1279.9627685546875
iter 0,  train loss 2.63721 vat loss 0.00843

data 12953.98438
logits 1279.959716796875
iter 1,  train loss 2.44700 vat loss 0.00843

data 12442.99316
logits 1279.9593505859375
iter 2,  train loss 2.37316 vat loss 0.00843

data 12292.56152
logits 1279.9571533203125
iter 3,  train loss 2.68267 vat loss 0.00843

data 12402.40137
logits 1279.9608154296875
iter 4,  train loss 2.61225 vat loss 0.00843

test acc 0.11058,  loss 2.31515


In [90]:
print("pytorch, cross entropy")
config.batch_norm = True
debug = False
model = MLP(config)
np.random.seed(1)
model.apply(weights_init)
model = model.to(device)
model.train()

max_iters = 3
lr = 0.002

torch_optimizer = optim.Adam(list(model.parameters()), lr)

criterion = nn.CrossEntropyLoss(reduction='none')
iterator = dataset.dataset.make_one_shot_iterator()


for i in range(5):
    x, y = iterator.get_next()
    print("data %.5f" % x.numpy().sum())
    images = torch.FloatTensor(x.numpy()).permute(0, 3, 1, 2).to(device)
    labels = torch.LongTensor(y.numpy()).to(device)

    # Build model.
    logits = model(images)
    print("logits", logits.data.sum())
    total_loss = 0
    loss_list = criterion(logits, labels)
    xent_loss = torch.mean(loss_list)
    total_loss = xent_loss

    vat_criterion = VAT(device, 0.3, 1e-6, use_entmin=False)
    vat_loss = vat_criterion(model, images)
    
    torch_optimizer.zero_grad()
    total_loss.backward()
    torch_optimizer.step()
    torch_optimizer.zero_grad()
    print("iter %d,  train loss %.5f vat loss %.5f\n" % (i, total_loss.item(), vat_loss.item()))
    if (i+1) % 10 == 0:
        debug = False
        
        acc, loss = evaluate_classifier(model, test_dataset.dataset.make_one_shot_iterator(), device)
        print("test acc %.5f,  loss %.5f" % (acc, loss))
    
debug = False
acc, loss = evaluate_classifier(model, test_dataset.dataset.make_one_shot_iterator(), device)
print("test acc %.5f,  loss %.5f" % (acc, loss))

pytorch, cross entropy
data 12790.14453
logits tensor(-3.8147e-06)
iter 0,  train loss 2.63721 vat loss 0.39495

data 12953.98438
logits tensor(201.0817)
iter 1,  train loss 1.51227 vat loss 0.02097

data 12442.99316
logits tensor(324.2178)




iter 2,  train loss 0.91790 vat loss 0.02568

data 12292.56152
logits tensor(304.0911)
iter 3,  train loss 0.88271 vat loss 0.03013

data 12402.40137
logits tensor(286.4545)
iter 4,  train loss 0.95338 vat loss 0.04151

test acc 0.77704,  loss 0.68860


In [89]:
def call_bn(bn, x, update_batch_stats=True):
    if bn.training is False:
        return bn(x)
    elif not update_batch_stats:
        return nfunc.batch_norm(x, None, None, bn.weight, bn.bias, True, bn.momentum, bn.eps)
    else:
        return bn(x)
    
class MLP(nn.Module):
    def __init__(self, config):
        super(MLP, self).__init__()
        self.input_len = 1 * 28 * 28
        self.fc1 = nn.Linear(self.input_len, 1200)
        self.fc2 = nn.Linear(1200, 1200)
        self.fc3 = nn.Linear(1200, 10)

        self.bn_fc1 = nn.BatchNorm1d(1200, eps=2e-5, affine=False)
        self.bn_fc2 = nn.BatchNorm1d(1200, eps=2e-5, affine=False)
        self.bn_fc3 = nn.BatchNorm1d(10, eps=2e-5, affine=False)

    def forward(self, x, update_batch_stats=True):

        x = x.view(-1, self.input_len)
        endpoints = {}
        h = nfunc.relu(call_bn(self.bn_fc1, self.fc1(x), update_batch_stats))
        endpoints["fc_layer0"] = h
        h = nfunc.relu(call_bn(self.bn_fc2, self.fc2(h), update_batch_stats))
        endpoints["fc_layer1"] = h
        h = call_bn(self.bn_fc3, self.fc3(h), update_batch_stats)
        return h

In [88]:
model = MLP(config)
np.random.seed(1)
model.apply(weights_init)
model = model.to(device)
print(images.sum())
model.train()
p = model(images, update_batch_stats=True)
print(p.sum())
p = model(images, update_batch_stats=False)
print(p.sum())
model.eval()
p = model(images, update_batch_stats=True)
print(p.sum())
model.eval()
p = model(images, update_batch_stats=False)
print(p.sum())

tensor(12442.9932)
tensor(1.1444e-05, grad_fn=<SumBackward0>)
tensor(1.1444e-05, grad_fn=<SumBackward0>)
tensor(68.5641, grad_fn=<SumBackward0>)
tensor(68.5641, grad_fn=<SumBackward0>)


In [85]:
print("pytorch, cross entropy")
config.batch_norm = True
debug = False
model = MLP(config)
np.random.seed(1)
model.apply(weights_init)
model = model.to(device)
model.train()

max_iters = 3
lr = 0.002

torch_optimizer = optim.Adam(list(model.parameters()), lr)

criterion = nn.CrossEntropyLoss(reduction='none')
iterator = dataset.dataset.make_one_shot_iterator()
debug = True

class VAT(object):

    def __init__(self, device, eps, xi, k=1, use_entmin=False):
        self.device = device
        self.xi = xi
        self.eps = eps
        self.k = k
        self.kl_div = nn.KLDivLoss(size_average=False, reduce=False)
        self.use_entmin = use_entmin

    def __call__(self, model, X):
        logits = model(X, update_batch_stats=False)
        # logits = model(X)
        prob_logits = nfunc.softmax(logits.detach(), dim=1)
        d = VAT.approx_power_iter(model, X, prob_logits, self.xi, self.k, self.device)
        
        
        logits_hat = model(X + self.eps * d, update_batch_stats=False)
        LDS = torch.mean(self.kl_div(
            nfunc.log_softmax(logits_hat, dim=1), prob_logits).sum(dim=1))
        if debug:
            print("post cost", LDS.item())
        if self.use_entmin:
            LDS += _entropy(logits_hat)

        return LDS

    @staticmethod
    def approx_power_iter(model, X, prob_logits, xi, k, device):
        kl_div = nn.KLDivLoss(size_average=False, reduce=False)
        r = torch.FloatTensor(np.random.random(X.shape))
        d = _l2_normalize(r).to(device)
        if debug:
            print("d", d.sum(), (d ** 2).sum())
            
        for ip in range(k):
            X_hat = X + d * xi
            if debug:
                print("input", X_hat.data.sum().item())
            X_hat.requires_grad = True
            model.eval()
            logits_hat = model(X_hat, update_batch_stats=False)
            # logits_hat = model(X_hat)
            if debug:
                print("output", logits_hat.data.sum().item())
            adv_distance = torch.mean(kl_div(nfunc.log_softmax(logits_hat, dim=1), prob_logits).sum(dim=1))
            if debug:
                print("loss", adv_distance.item())
            adv_distance.backward()
            if debug:
                print("grad d", X_hat.grad.sum())
            d = _l2_normalize(X_hat.grad).to(device)
            if debug:
                print("d", d.sum())
        return d
    
for i in range(3):
    x, y = iterator.get_next()
    print("data %.5f" % x.numpy().sum())
    images = torch.FloatTensor(x.numpy()).permute(0, 3, 1, 2).to(device)
    labels = torch.LongTensor(y.numpy()).to(device)

    # Build model.
    logits = model(images)
    total_loss = 0
    loss_list = criterion(logits, labels)
    xent_loss = torch.mean(loss_list)
    total_loss = xent_loss

    vat_criterion = VAT(device, 0.3, 1e-6, use_entmin=False)
    vat_loss = vat_criterion(model, images)
    
#     torch_optimizer.zero_grad()
#     total_loss.backward()
#     torch_optimizer.step()
#     torch_optimizer.zero_grad()
    print("iter %d,  train loss %.5f vat loss %.5f\n" % (i, total_loss.item(), vat_loss.item()))
    if (i+1) % 10 == 0:
        debug = False
        
        acc, loss = evaluate_classifier(model, test_dataset.dataset.make_one_shot_iterator(), device)
        print("test acc %.5f,  loss %.5f" % (acc, loss))
    
debug = False
acc, loss = evaluate_classifier(model, test_dataset.dataset.make_one_shot_iterator(), device)
print("test acc %.5f,  loss %.5f" % (acc, loss))

pytorch, cross entropy
data 12790.14453
d tensor(3101.4385) tensor(128.)
input 12790.1474609375
output 60.19732666015625
loss 0.2745497226715088
grad d tensor(0.1649)
d tensor(51.4371)
post cost 0.3949525058269501
iter 0,  train loss 2.63721 vat loss 0.39495

data 12953.98438
d tensor(3108.1763) tensor(128.)
input 12953.9853515625
output 79.67488098144531
loss 3.4834282125473237e-09
grad d tensor(7.3648e-09)
d tensor(15.5153)
post cost 0.008067242801189423
iter 1,  train loss 2.27209 vat loss 0.00807

data 12442.99316
d tensor(3103.8330) tensor(128.)
input 12442.994140625
output 70.6865234375
loss 4.737283454403496e-09
grad d tensor(1.4721e-09)
d tensor(8.9609)
post cost 0.008078992366790771
iter 2,  train loss 2.28871 vat loss 0.00808





test acc 0.12029,  loss 2.31698


In [80]:
def _l2_normalize(d):
    d = d.cpu().numpy()
    axis = tuple(range(1, len(d.shape)))
    reshape = tuple([-1] + [1] * (len(d.shape) -1))
    d /= (np.sqrt(np.sum(d ** 2, axis=axis)).reshape(
        reshape) + 1e-16)
    return torch.from_numpy(d)


def _entropy(logits):
    p = nfunc.softmax(logits, dim=1)
    return -torch.mean(torch.sum(p * F.log_softmax(logits, dim=1), dim=1))




In [60]:
print("pytorch, cross entropy")
config.batch_norm = True
debug = False
model = MLP(config)
np.random.seed(1)
model.apply(weights_init)
model = model.to(device)
model.train()

max_iters = 3
lr = 0.002

torch_optimizer = optim.Adam(list(model.parameters()), lr)

criterion = nn.CrossEntropyLoss(reduction='none')
iterator = dataset.dataset.make_one_shot_iterator()


for i in range(5):
    x, y = iterator.get_next()
    print("data %.5f" % x.numpy().sum())
    images = torch.FloatTensor(x.numpy()).permute(0, 3, 1, 2).to(device)
    labels = torch.LongTensor(y.numpy()).to(device)

    # Build model.
    logits = model(images)
    total_loss = 0
    loss_list = criterion(logits, labels)
    xent_loss = torch.mean(loss_list)
    total_loss = xent_loss

    vat_criterion = VAT(device, 0.3, 1e-6, use_entmin=False)
    vat_loss = vat_criterion(model, images)
    
#     torch_optimizer.zero_grad()
#     total_loss.backward()
#     torch_optimizer.step()
#     torch_optimizer.zero_grad()
    print("iter %d,  train loss %.5f vat loss %.5f\n" % (i, total_loss.item(), vat_loss.item()))
    if (i+1) % 10 == 0:
        debug = False
        
        acc, loss = evaluate_classifier(model, test_dataset.dataset.make_one_shot_iterator(), device)
        print("test acc %.5f,  loss %.5f" % (acc, loss))
    
debug = False
acc, loss = evaluate_classifier(model, test_dataset.dataset.make_one_shot_iterator(), device)
print("test acc %.5f,  loss %.5f" % (acc, loss))

pytorch, cross entropy
data 12790.14453
iter 0,  train loss 2.63721 vat loss 0.17757

data 12953.98438
iter 1,  train loss 2.44700 vat loss 0.19656

data 12442.99316




iter 2,  train loss 2.37316 vat loss 0.19594

data 12292.56152
iter 3,  train loss 2.68267 vat loss 0.20597

data 12402.40137
iter 4,  train loss 2.61225 vat loss 0.18760

test acc 0.12340,  loss 2.41112


In [14]:
print("pytorch, cross entropy")
config.batch_norm = True
debug = False
model = MLP(config)
np.random.seed(1)
model.apply(weights_init)
model = model.to(device)
model.train()

max_iters = 3
lr = 0.002

torch_optimizer = optim.Adam(list(model.parameters()), lr)

criterion = nn.CrossEntropyLoss(reduction='none')
iterator = dataset.dataset.make_one_shot_iterator()

for i in range(max_iters):
    images, labels = iterator.get_next()
    print("data %.5f" % images.numpy().sum())
    images = torch.FloatTensor(images.numpy()).permute(0, 3, 1, 2).to(device)
    labels = torch.LongTensor(labels.numpy()).to(device)

    # Build model.
    logits = model(images)
    total_loss = 0
    loss_list = criterion(logits, labels)
    xent_loss = torch.mean(loss_list)
    total_loss = xent_loss
    
    print("iter %d,  train loss %.5f\n" % (i, total_loss))
    torch_optimizer.zero_grad()
    total_loss.backward()
    torch_optimizer.step()
    torch_optimizer.zero_grad()
    
debug = False
acc, loss = evaluate_classifier(model, test_dataset.dataset.make_one_shot_iterator(), device)
print("test acc %.5f,  loss %.5f" % (acc, loss))

pytorch, cross entropy
data 12790.14453
iter 0,  train loss 2.38092

data 12953.98438
iter 1,  train loss 1.61463

data 12442.99316
iter 2,  train loss 1.55225

test acc 0.45202,  loss 1.90216
