In [10]:
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from tqdm import tqdm
from torch.optim import Adam
from torchvision.datasets import MNIST
from torchvision.transforms import Compose, ToTensor, Normalize, Lambda
from torch.utils.data import DataLoader
import numpy as np
from train_utils import *
import torch.nn.functional as F


device = torch.device('cuda')
batchsize = 100
#================================
# Quantization levels
#================================
img_half_level = 4
weight_bit = 8 
output_bit = 6
isint = 0
clamp_std = 0
noise_scale = 5e-2

def MNIST_loaders(train_batch_size=50000, test_batch_size=10000):

    transform = Compose([
        ToTensor(),
        Normalize((0.1307,), (0.3081,)),
        Lambda(lambda x: torch.flatten(x))])

    train_loader = DataLoader(
        MNIST('./data/', train=True,
              download=True,
              transform=transform),
        batch_size=train_batch_size, shuffle=True)

    test_loader = DataLoader(
        MNIST('./data/', train=False,
              download=True,
              transform=transform),
        batch_size=test_batch_size, shuffle=False)

    return train_loader, test_loader


def overlay_y_on_x(x, y):
    """
    Replace the first 10 pixels of data [x] with one-hot-encoded label [y]
    """
    x_ = x.clone()
    x_[:, :10] *= 0.0
    x_[range(x.shape[0]), y] = x.max()
    return x_


class Net(torch.nn.Module):
    def __init__(self, dims):
        super().__init__()
        self.layers = []
        for d in range(len(dims) - 1):
            self.layers += [Layer(dims[d], dims[d + 1]).cuda()]

    def predict(self, x): 
        # 这个predict方法是理解ff方法的关键，它不是像普通的predict方法一样，输入一个样本，输出一个长度为num_cls的softmax预测向量
        # 而是一个样本反复输入这个网络num_cls次，把每种带标签的可能都计算一个goodness，也就是这个数据是好数据的可能性，找出最高goodness的就是预测类别
        goodness_per_label = []
        for label in range(10): # 对每一个标签进行预测
            h = overlay_y_on_x(x, label) # h是输入x和标签label的叠加
            goodness = [] # goodness是一个列表，里面存放了每一层的结果向量的均方
            for layer in self.layers: # 对每一层进行前传
                h = layer(h) # h是每一层的输出
                goodness += [h.pow(2).mean(1)] # goodness是每一层的结果向量的均方。h.pow(2)是h的每一个元素的平方，mean(1)是对每一行求均值
            goodness_per_label += [sum(goodness).unsqueeze(1)] # goodness_per_label是每一层的结果向量的均方的和
        goodness_per_label = torch.cat(goodness_per_label, 1) # goodness_per_label是每一层的结果向量的均方的和的列表
        return goodness_per_label.argmax(1) # 返回的是goodness_per_label中每一行最大值的索引，也就是说，返回的是每一行最大值的列索引

    def train(self): #, x_pos, x_neg): # 这个train方法是对整个网络进行训练，训练的目标是让正样本的结果向量的均方上升，负样本的结果向量的均方下降
        x, y = next(iter(train_loader))
        x, _ = my.data_quantization_sym(x, half_level=img_half_level)
        x, y = x.cuda(), y.cuda()
        x_pos = overlay_y_on_x(x, y)
        # rnd = torch.randperm(x.size(0)) # 生成一个从0到n-1的随机整数序列。
        # x_neg = overlay_y_on_x(x, y[rnd])
        y_rnd = y.clone()
        for i, y_i in enumerate(y):
            li = list(range(10))
            li.remove(y_i)
            j = np.random.choice(li)
            y_rnd[i] = j

        x_neg = overlay_y_on_x(x, y_rnd)

        h_pos, h_neg = x_pos, x_neg # h_pos和h_neg是正样本和负样本的输入
        for i, layer in enumerate(self.layers): # 对每一层进行训练
            print('training layer', i, '...') # 这里的i是层数
            h_pos, h_neg = layer.train(h_pos, h_neg) # 对每一层进行训练，得到了正样本和负样本的结果向量，这个结果向量是该层的输出，也是下一层的输入
            # 也就是说，这个训练的过程中，正样本在前传过程中得到的每一层输出都被认为是正的，负样本在前传过程中得到的每一层输出都被认为是负的，也就是说，出身决定一切


class Layer(nn.Linear):
    def __init__(self, in_features, out_features,
                 bias=True, device=None, dtype=None):
        super().__init__(in_features, out_features, bias, device, dtype)
        self.relu = torch.nn.ReLU()
        self.opt = Adam(self.parameters(), lr=8e-3)
        self.threshold = 2.0
        self.num_epochs = 8000 # 训练的次数是1000次
        # self.linear = my.Linear_quant_noise(in_features, out_features, weight_bit=weight_bit, output_bit=output_bit, isint=isint, clamp_std=clamp_std, noise_scale=noise_scale, bias=True)
        self.weight_bit = weight_bit
        self.output_bit = output_bit
        self.isint = 0
        self.clamp_std = 0
        self.noise_scale = 0
        self.weight_half_level = 2 ** weight_bit / 2 - 1
        self.output_half_level = 2 ** output_bit / 2 - 1
        # self.linear = nn.Linear(in_features, out_features, bias=True)

    def forward(self, x):
        x_direction = x / (x.norm(2, 1, keepdim=True) + 1e-4)  # 这个是对输入做了归一化，使得输入的模长为1，这在论文里有解释
        # x_direction = x / x.max()
        self.weight_, self.bias_ = my.Weight_Quant_Noise.apply(self.weight, self.bias,
                                            self.weight_half_level, 
                                            self.isint, self.clamp_std,
                                            self.noise_scale
                                            )
        x_direction = self.relu(
            # torch.mm(x_direction, self.weight.T) +
            # self.bias.unsqueeze(0) # 这个是对输入做了最基本的前向传播，得到了结果向量4
            # self.linear(x_direction)
            F.linear(x_direction, self.weight_, self.bias_)
            ) # 注意，在前传之后，随即使用了relu激活函数，这意味着每一层的所有激活值都是非负的
        
        x = my.Feature_Quant.apply(x, self.output_half_level, self.isint)

        return x_direction

    def train(self, x_pos, x_neg):
        # 训练其实就是对每一层分别进行训练，训练的目标是让正样本的结果向量的均方上升，负样本的结果向量的均方下降
        # 每一层的forward方法定义如上一个函数，这里的train方法定义了训练的过程
        for i in tqdm(range(self.num_epochs)):
            # minibatch
            # bz = 100
            # for j in range(0, x_pos.size(0), bz):
            #     mask = torch.randperm(x_pos.size(0))[:bz]
            #     x_pos = x_pos[mask] # 随机采样1000个正样本
            #     x_neg = x_neg[mask] # 随机采样1000个负样本


                # for data, name in zip([x, x_pos, x_neg], ['orig', 'pos', 'neg']):
                #     visualize_sample(data, name)
                
                # print(self.forward(x_pos).pow(2), self.forward(x_pos).pow(2).shape)
                g_pos = self.forward(x_pos).pow(2).mean(1) # g_pos 是正样本x_pos在该层前向传播得到的结果向量的均方
                g_neg = self.forward(x_neg).pow(2).mean(1) # g_neg 是负样本x_neg在该层前向传播得到的结果向量的均方
                # 论文关于使用L2范数来度量的理由：
                # There are two main reasons for using the squared length of the activity vector as the goodness function.
                # First, it has very simple derivatives. Second, layer normalization removes all trace of the goodness.
                
                # The following loss pushes pos (neg) samples to
                # values larger (smaller) than the self.threshold.
                # 随着训练过程，loss下降，g_pos将上升，g_neg将下降
                loss = torch.log(1 + torch.exp(torch.cat([
                    -g_pos + self.threshold,
                    g_neg - self.threshold]))).mean() # loss = [log(1+exp(-(g_pos-threshold))) + log(1+exp(g_neg-threshold))] / 2
                # print(loss)
                self.opt.zero_grad()
                # this backward just compute the derivative and hence
                # is not considered backpropagation.
                loss.backward()
                self.opt.step()
                # 关于这里为什么能够work：
                # 1. loss是权重的函数，loss的核心思想是让g_pos上升，g_neg下降
                # 2. g_pos和g_neg是x_pos和x_neg的函数，x_pos和x_neg反映了客观世界，是这样要学习的对象。有了x_pos和x_neg，就能够计算出g_pos和g_neg，有了g_pos和g_neg，就能够计算出loss
                # 3. 通过loss.backward()，计算loss对权重的梯度，使得loss下降，g_pos上升，g_neg下降
                # 4. 通过self.opt.step()，更新了self.weight和self.bias
        return self.forward(x_pos).detach(), self.forward(x_neg).detach()

    
# def visualize_sample(data, name='', idx=0):
#     reshaped = data[idx].cpu().reshape(28, 28)
#     plt.figure(figsize = (4, 4))
#     plt.title(name)
#     plt.imshow(reshaped, cmap="gray")
#     plt.show()
    
    
if __name__ == "__main__":
    torch.manual_seed(1234)
    train_loader, test_loader = MNIST_loaders(train_batch_size=50000, test_batch_size=10000)

    net = Net([784, 500, 500])
    x, y = next(iter(train_loader))
    x, y = x.cuda(), y.cuda()
    # x_pos = overlay_y_on_x(x, y)
    # # rnd = torch.randperm(x.size(0)) # 生成一个从0到n-1的随机整数序列。
    # # x_neg = overlay_y_on_x(x, y[rnd])
    # y_rnd = y.clone()
    # for i, y_i in enumerate(y):
    #     li = list(range(10))
    #     li.remove(y_i)
    #     j = np.random.choice(li)
    #     y_rnd[i] = j

    # x_neg = overlay_y_on_x(x, y_rnd)
    # # for data, name in zip([x, x_pos, x_neg], ['orig', 'pos', 'neg']):
    # #     visualize_sample(data, name)
    
    net.train()#x_pos, x_neg)

    print('train error:', 1.0 - net.predict(x).eq(y).float().mean().item())

    x_te, y_te = next(iter(test_loader))
    x_te, y_te = x_te.cuda(), y_te.cuda()

    print('test error:', 1.0 - net.predict(x_te).eq(y_te).float().mean().item())

    print(net.predict(x_te)[:30], y_te[:30])


training layer 0 ...


100%|██████████| 8000/8000 [04:03<00:00, 32.80it/s]


training layer 1 ...


100%|██████████| 8000/8000 [02:57<00:00, 45.17it/s]


train error: 0.04107999801635742
test error: 0.05169999599456787
tensor([7, 2, 1, 0, 4, 1, 4, 9, 6, 9, 0, 6, 9, 0, 1, 5, 9, 7, 3, 4, 9, 6, 6, 5,
        4, 0, 7, 4, 0, 1], device='cuda:0') tensor([7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0, 6, 9, 0, 1, 5, 9, 7, 3, 4, 9, 6, 6, 5,
        4, 0, 7, 4, 0, 1], device='cuda:0')


In [1]:
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from tqdm import tqdm
from torch.optim import Adam
from torchvision.datasets import MNIST
from torchvision.transforms import Compose, ToTensor, Normalize, Lambda
from torch.utils.data import DataLoader
import numpy as np
from train_utils import *
import torch.nn.functional as F


device = torch.device('cuda')
batchsize = 100
#================================
# Quantization levels
#================================
img_half_level = 4
weight_bit = 8 
output_bit = 6
isint = 0
clamp_std = 0
noise_scale = 5e-2

def MNIST_loaders(train_batch_size=50000, test_batch_size=10000):

    transform = Compose([
        ToTensor(),
        Normalize((0.1307,), (0.3081,)),
        Lambda(lambda x: torch.flatten(x))])

    train_loader = DataLoader(
        MNIST('./data/', train=True,
              download=True,
              transform=transform),
        batch_size=train_batch_size, shuffle=True)

    test_loader = DataLoader(
        MNIST('./data/', train=False,
              download=True,
              transform=transform),
        batch_size=test_batch_size, shuffle=False)

    return train_loader, test_loader


def overlay_y_on_x(x, y):
    """
    Replace the first 10 pixels of data [x] with one-hot-encoded label [y]
    """
    x_ = x.clone()
    x_[:, :10] *= 0.0
    x_[range(x.shape[0]), y] = x.max()
    return x_


class Net(torch.nn.Module):
    def __init__(self, dims):
        super().__init__()
        self.layers = []
        for d in range(len(dims) - 1):
            self.layers += [Layer(dims[d], dims[d + 1]).cuda()]

    def predict(self, x): 
        # 这个predict方法是理解ff方法的关键，它不是像普通的predict方法一样，输入一个样本，输出一个长度为num_cls的softmax预测向量
        # 而是一个样本反复输入这个网络num_cls次，把每种带标签的可能都计算一个goodness，也就是这个数据是好数据的可能性，找出最高goodness的就是预测类别
        goodness_per_label = []
        for label in range(10): # 对每一个标签进行预测
            h = overlay_y_on_x(x, label) # h是输入x和标签label的叠加
            goodness = [] # goodness是一个列表，里面存放了每一层的结果向量的均方
            for layer in self.layers: # 对每一层进行前传
                h = layer(h) # h是每一层的输出
                goodness += [h.pow(2).mean(1)] # goodness是每一层的结果向量的均方。h.pow(2)是h的每一个元素的平方，mean(1)是对每一行求均值
            goodness_per_label += [sum(goodness).unsqueeze(1)] # goodness_per_label是每一层的结果向量的均方的和
        goodness_per_label = torch.cat(goodness_per_label, 1) # goodness_per_label是每一层的结果向量的均方的和的列表
        return goodness_per_label.argmax(1) # 返回的是goodness_per_label中每一行最大值的索引，也就是说，返回的是每一行最大值的列索引

    def train(self): #, x_pos, x_neg): # 这个train方法是对整个网络进行训练，训练的目标是让正样本的结果向量的均方上升，负样本的结果向量的均方下降
        x, y = next(iter(train_loader))
        x, _ = my.data_quantization_sym(x, half_level=img_half_level)
        x, y = x.cuda(), y.cuda()
        x_pos = overlay_y_on_x(x, y)
        # rnd = torch.randperm(x.size(0)) # 生成一个从0到n-1的随机整数序列。
        # x_neg = overlay_y_on_x(x, y[rnd])
        y_rnd = y.clone()
        for i, y_i in enumerate(y):
            li = list(range(10))
            li.remove(y_i)
            j = np.random.choice(li)
            y_rnd[i] = j

        x_neg = overlay_y_on_x(x, y_rnd)

        h_pos, h_neg = x_pos, x_neg # h_pos和h_neg是正样本和负样本的输入
        for i, layer in enumerate(self.layers): # 对每一层进行训练
            print('training layer', i, '...') # 这里的i是层数
            h_pos, h_neg = layer.train(h_pos, h_neg) # 对每一层进行训练，得到了正样本和负样本的结果向量，这个结果向量是该层的输出，也是下一层的输入
            # 也就是说，这个训练的过程中，正样本在前传过程中得到的每一层输出都被认为是正的，负样本在前传过程中得到的每一层输出都被认为是负的，也就是说，出身决定一切


class Layer(nn.Linear):
    def __init__(self, in_features, out_features,
                 bias=True, device=None, dtype=None):
        super().__init__(in_features, out_features, bias, device, dtype)
        self.relu = torch.nn.ReLU()
        self.opt = Adam(self.parameters(), lr=8e-3)
        self.threshold = 2.0
        self.num_epochs = 4000000 # 训练的次数是1000次
        # self.linear = my.Linear_quant_noise(in_features, out_features, weight_bit=weight_bit, output_bit=output_bit, isint=isint, clamp_std=clamp_std, noise_scale=noise_scale, bias=True)
        self.weight_bit = weight_bit
        self.output_bit = output_bit
        self.isint = 0
        self.clamp_std = 0
        self.noise_scale = 0
        self.weight_half_level = 2 ** weight_bit / 2 - 1
        self.output_half_level = 2 ** output_bit / 2 - 1
        # self.linear = nn.Linear(in_features, out_features, bias=True)

    def forward(self, x):
        x_direction = x / (x.norm(2, 1, keepdim=True) + 1e-4)  # 这个是对输入做了归一化，使得输入的模长为1，这在论文里有解释
        # x_direction = x / x.max()
        self.weight_, self.bias_ = my.Weight_Quant_Noise.apply(self.weight, self.bias,
                                            self.weight_half_level, 
                                            self.isint, self.clamp_std,
                                            self.noise_scale
                                            )
        x_direction = self.relu(
            # torch.mm(x_direction, self.weight.T) +
            # self.bias.unsqueeze(0) # 这个是对输入做了最基本的前向传播，得到了结果向量4
            # self.linear(x_direction)
            F.linear(x_direction, self.weight_, self.bias_)
            ) # 注意，在前传之后，随即使用了relu激活函数，这意味着每一层的所有激活值都是非负的
        
        x = my.Feature_Quant.apply(x, self.output_half_level, self.isint)

        return x_direction

    def train(self, x_pos, x_neg):
        # 训练其实就是对每一层分别进行训练，训练的目标是让正样本的结果向量的均方上升，负样本的结果向量的均方下降
        # 每一层的forward方法定义如上一个函数，这里的train方法定义了训练的过程
        for i in tqdm(range(self.num_epochs)):
            # minibatch
            # bz = 100
            # for j in range(0, x_pos.size(0), bz):
            #     mask = torch.randperm(x_pos.size(0))[:bz]
            #     x_pos = x_pos[mask] # 随机采样1000个正样本
            #     x_neg = x_neg[mask] # 随机采样1000个负样本


                # for data, name in zip([x, x_pos, x_neg], ['orig', 'pos', 'neg']):
                #     visualize_sample(data, name)
                
                # print(self.forward(x_pos).pow(2), self.forward(x_pos).pow(2).shape)
                g_pos = self.forward(x_pos).pow(2).mean(1) # g_pos 是正样本x_pos在该层前向传播得到的结果向量的均方
                g_neg = self.forward(x_neg).pow(2).mean(1) # g_neg 是负样本x_neg在该层前向传播得到的结果向量的均方
                # 论文关于使用L2范数来度量的理由：
                # There are two main reasons for using the squared length of the activity vector as the goodness function.
                # First, it has very simple derivatives. Second, layer normalization removes all trace of the goodness.
                
                # The following loss pushes pos (neg) samples to
                # values larger (smaller) than the self.threshold.
                # 随着训练过程，loss下降，g_pos将上升，g_neg将下降
                loss = torch.log(1 + torch.exp(torch.cat([
                    -g_pos + self.threshold,
                    g_neg - self.threshold]))).mean() # loss = [log(1+exp(-(g_pos-threshold))) + log(1+exp(g_neg-threshold))] / 2
                # print(loss)
                self.opt.zero_grad()
                # this backward just compute the derivative and hence
                # is not considered backpropagation.
                loss.backward()
                self.opt.step()
                # 关于这里为什么能够work：
                # 1. loss是权重的函数，loss的核心思想是让g_pos上升，g_neg下降
                # 2. g_pos和g_neg是x_pos和x_neg的函数，x_pos和x_neg反映了客观世界，是这样要学习的对象。有了x_pos和x_neg，就能够计算出g_pos和g_neg，有了g_pos和g_neg，就能够计算出loss
                # 3. 通过loss.backward()，计算loss对权重的梯度，使得loss下降，g_pos上升，g_neg下降
                # 4. 通过self.opt.step()，更新了self.weight和self.bias
        return self.forward(x_pos).detach(), self.forward(x_neg).detach()

    
# def visualize_sample(data, name='', idx=0):
#     reshaped = data[idx].cpu().reshape(28, 28)
#     plt.figure(figsize = (4, 4))
#     plt.title(name)
#     plt.imshow(reshaped, cmap="gray")
#     plt.show()
    
    
if __name__ == "__main__":
    torch.manual_seed(1234)
    train_loader, test_loader = MNIST_loaders(train_batch_size=100, test_batch_size=10000)

    net = Net([784, 500, 500])
    x, y = next(iter(train_loader))
    x, y = x.cuda(), y.cuda()
    # x_pos = overlay_y_on_x(x, y)
    # # rnd = torch.randperm(x.size(0)) # 生成一个从0到n-1的随机整数序列。
    # # x_neg = overlay_y_on_x(x, y[rnd])
    # y_rnd = y.clone()
    # for i, y_i in enumerate(y):
    #     li = list(range(10))
    #     li.remove(y_i)
    #     j = np.random.choice(li)
    #     y_rnd[i] = j

    # x_neg = overlay_y_on_x(x, y_rnd)
    # # for data, name in zip([x, x_pos, x_neg], ['orig', 'pos', 'neg']):
    # #     visualize_sample(data, name)
    
    net.train()#x_pos, x_neg)

    print('train accu:', net.predict(x).eq(y).float().mean().item())

    x_te, y_te = next(iter(test_loader))
    x_te, y_te = x_te.cuda(), y_te.cuda()

    print('test accu:', net.predict(x_te).eq(y_te).float().mean().item())

    print(net.predict(x_te)[:30], y_te[:30])


training layer 0 ...


100%|██████████| 4000000/4000000 [1:15:12<00:00, 886.35it/s]


training layer 1 ...


100%|██████████| 4000000/4000000 [1:10:28<00:00, 945.93it/s] 


train accu: 0.3199999928474426
test accu: 0.3824999928474426
tensor([7, 2, 1, 5, 4, 1, 9, 1, 4, 1, 0, 2, 9, 0, 1, 5, 4, 7, 1, 4, 9, 3, 4, 6,
        4, 5, 4, 4, 0, 2], device='cuda:0') tensor([7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0, 6, 9, 0, 1, 5, 9, 7, 3, 4, 9, 6, 6, 5,
        4, 0, 7, 4, 0, 1], device='cuda:0')


In [15]:

    # x_neg = overlay_y_on_x(x, y_rnd)
    # # for data, name in zip([x, x_pos, x_neg], ['orig', 'pos', 'neg']):
    # #     visualize_sample(data, name)
    
    net.train()#x_pos, x_neg)

    print('train accu:', net.predict(x).eq(y).float().mean().item())

    x_te, y_te = next(iter(test_loader))
    x_te, y_te = x_te.cuda(), y_te.cuda()

    print('test accu:', net.predict(x_te).eq(y_te).float().mean().item())

    print(net.predict(x_te)[:30], y_te[:30])


training layer 0 ...


100%|██████████| 500000/500000 [09:35<00:00, 868.64it/s]


training layer 1 ...


100%|██████████| 500000/500000 [09:05<00:00, 916.67it/s]


train accu: 0.5199999809265137
test accu: 0.5268999934196472
tensor([7, 2, 1, 0, 4, 1, 9, 9, 4, 9, 2, 2, 9, 0, 2, 2, 8, 4, 4, 4, 9, 6, 4, 4,
        4, 2, 7, 4, 0, 1], device='cuda:0') tensor([7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0, 6, 9, 0, 1, 5, 9, 7, 3, 4, 9, 6, 6, 5,
        4, 0, 7, 4, 0, 1], device='cuda:0')


In [22]:
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from tqdm import tqdm
from torch.optim import Adam
from torchvision.datasets import MNIST
from torchvision.transforms import Compose, ToTensor, Normalize, Lambda
from torch.utils.data import DataLoader
import numpy as np


def MNIST_loaders(train_batch_size=50000, test_batch_size=10000):

    transform = Compose([
        ToTensor(),
        Normalize((0.1307,), (0.3081,)),
        Lambda(lambda x: torch.flatten(x))])

    train_loader = DataLoader(
        MNIST('./data/', train=True,
              download=True,
              transform=transform),
        batch_size=train_batch_size, shuffle=True)

    test_loader = DataLoader(
        MNIST('./data/', train=False,
              download=True,
              transform=transform),
        batch_size=test_batch_size, shuffle=False)

    return train_loader, test_loader


def overlay_y_on_x(x, y):
    """
    Replace the first 10 pixels of data [x] with one-hot-encoded label [y]
    """
    x_ = x.clone()
    x_[:, :10] *= 0.0
    x_[range(x.shape[0]), y] = x.max()
    return x_


class Net(torch.nn.Module):
    def __init__(self, dims):
        super().__init__()
        self.layers = []
        for d in range(len(dims) - 1):
            self.layers += [Layer(dims[d], dims[d + 1]).cuda()]

    def predict(self, x): 
        # 这个predict方法是理解ff方法的关键，它不是像普通的predict方法一样，输入一个样本，输出一个长度为num_cls的softmax预测向量
        # 而是一个样本反复输入这个网络num_cls次，把每种带标签的可能都计算一个goodness，也就是这个数据是好数据的可能性，找出最高goodness的就是预测类别
        goodness_per_label = []
        for label in range(10): # 对每一个标签进行预测
            h = overlay_y_on_x(x, label) # h是输入x和标签label的叠加
            goodness = [] # goodness是一个列表，里面存放了每一层的结果向量的均方
            for layer in self.layers: # 对每一层进行前传
                h = layer(h) # h是每一层的输出
                goodness += [h.pow(2).mean(1)] # goodness是每一层的结果向量的均方。h.pow(2)是h的每一个元素的平方，mean(1)是对每一行求均值
            goodness_per_label += [sum(goodness).unsqueeze(1)] # goodness_per_label是每一层的结果向量的均方的和
        goodness_per_label = torch.cat(goodness_per_label, 1) # goodness_per_label是每一层的结果向量的均方的和的列表
        return goodness_per_label.argmax(1) # 返回的是goodness_per_label中每一行最大值的索引，也就是说，返回的是每一行最大值的列索引

    def train(self): #, x_pos, x_neg): # 这个train方法是对整个网络进行训练，训练的目标是让正样本的结果向量的均方上升，负样本的结果向量的均方下降
        # minibatch
        # bz = 100
        for x, y in tqdm(train_loader):
            # print(i[1][:10])
        # for j in range(0, x_pos.size(0), bz):
            # x, y = next(iter(train_loader))
            x, y = x.cuda(), y.cuda()
            x_pos = overlay_y_on_x(x, y)
            # rnd = torch.randperm(x.size(0)) # 生成一个从0到n-1的随机整数序列。
            # x_neg = overlay_y_on_x(x, y[rnd])
            y_rnd = y.clone()
            for i, y_i in enumerate(y):
                li = list(range(10))
                li.remove(y_i)
                j = np.random.choice(li)
                y_rnd[i] = j

            x_neg = overlay_y_on_x(x, y_rnd)

            h_pos, h_neg = x_pos, x_neg # h_pos和h_neg是正样本和负样本的输入
            for i, layer in enumerate(self.layers): # 对每一层进行训练
                # print('training layer', i, '...') # 这里的i是层数
                h_pos, h_neg = layer.train(h_pos, h_neg) # 对每一层进行训练，得到了正样本和负样本的结果向量，这个结果向量是该层的输出，也是下一层的输入
                # 也就是说，这个训练的过程中，正样本在前传过程中得到的每一层输出都被认为是正的，负样本在前传过程中得到的每一层输出都被认为是负的，也就是说，出身决定一切


class Layer(nn.Linear):
    def __init__(self, in_features, out_features,
                 bias=True, device=None, dtype=None):
        super().__init__(in_features, out_features, bias, device, dtype)
        self.relu = torch.nn.ReLU()
        self.opt = Adam(self.parameters(), lr=0.008)
        self.threshold = 2.0
        self.num_epochs = 1000 # 训练的次数是1000次

    def forward(self, x):
        # x_direction = x / (x.norm(2, 1, keepdim=True) + 1e-4)  # 这个是对输入做了归一化，使得输入的模长为1，这在论文里有解释
        x_direction = x / x.max()
        x_direction =  self.relu(
            torch.mm(x_direction, self.weight.T) +
            self.bias.unsqueeze(0) # 这个是对输入做了最基本的前向传播，得到了结果向量
            ) # 注意，在前传之后，随即使用了relu激活函数，这意味着每一层的所有激活值都是非负的
        return x_direction

    def train(self, x_pos, x_neg):
        # 训练其实就是对每一层分别进行训练，训练的目标是让正样本的结果向量的均方上升，负样本的结果向量的均方下降
        # 每一层的forward方法定义如上一个函数，这里的train方法定义了训练的过程
        for i in range(self.num_epochs):
            # minibatch
            # bz = 100
            # for j in range(0, x_pos.size(0), bz):
                # mask = torch.randperm(x_pos.size(0))[:bz]
                # x_pos = x_pos[mask] # 随机采样1000个正样本
                # x_neg = x_neg[mask] # 随机采样1000个负样本


                # for data, name in zip([x, x_pos, x_neg], ['orig', 'pos', 'neg']):
                #     visualize_sample(data, name)
                
                # print(self.forward(x_pos).pow(2), self.forward(x_pos).pow(2).shape)
                g_pos = self.forward(x_pos).pow(2).mean(1) # g_pos 是正样本x_pos在该层前向传播得到的结果向量的均方
                g_neg = self.forward(x_neg).pow(2).mean(1) # g_neg 是负样本x_neg在该层前向传播得到的结果向量的均方
                # 论文关于使用L2范数来度量的理由：
                # There are two main reasons for using the squared length of the activity vector as the goodness function.
                # First, it has very simple derivatives. Second, layer normalization removes all trace of the goodness.
                
                # The following loss pushes pos (neg) samples to
                # values larger (smaller) than the self.threshold.
                # 随着训练过程，loss下降，g_pos将上升，g_neg将下降
                loss = torch.log(1 + torch.exp(torch.cat([
                    -g_pos + self.threshold,
                    g_neg - self.threshold]))).mean() # loss = [log(1+exp(-(g_pos-threshold))) + log(1+exp(g_neg-threshold))] / 2
                # print(loss)
                self.opt.zero_grad()
                # this backward just compute the derivative and hence
                # is not considered backpropagation.
                loss.backward()
                self.opt.step()
                # 关于这里为什么能够work：
                # 1. loss是权重的函数，loss的核心思想是让g_pos上升，g_neg下降
                # 2. g_pos和g_neg是x_pos和x_neg的函数，x_pos和x_neg反映了客观世界，是这样要学习的对象。有了x_pos和x_neg，就能够计算出g_pos和g_neg，有了g_pos和g_neg，就能够计算出loss
                # 3. 通过loss.backward()，计算loss对权重的梯度，使得loss下降，g_pos上升，g_neg下降
                # 4. 通过self.opt.step()，更新了self.weight和self.bias
        return self.forward(x_pos).detach(), self.forward(x_neg).detach()

    
# def visualize_sample(data, name='', idx=0):
#     reshaped = data[idx].cpu().reshape(28, 28)
#     plt.figure(figsize = (4, 4))
#     plt.title(name)
#     plt.imshow(reshaped, cmap="gray")
#     plt.show()
    
    
if __name__ == "__main__":
    torch.manual_seed(1234)
    train_loader, test_loader = MNIST_loaders(train_batch_size=100, test_batch_size=10000)

    net = Net([784, 500, 500])
    x, y = next(iter(train_loader))
    x, y = x.cuda(), y.cuda()
    # x_pos = overlay_y_on_x(x, y)
    # # rnd = torch.randperm(x.size(0)) # 生成一个从0到n-1的随机整数序列。
    # # x_neg = overlay_y_on_x(x, y[rnd])
    # y_rnd = y.clone()
    # for i, y_i in enumerate(y):
    #     li = list(range(10))
    #     li.remove(y_i)
    #     j = np.random.choice(li)
    #     y_rnd[i] = j

    # x_neg = overlay_y_on_x(x, y_rnd)
    # # for data, name in zip([x, x_pos, x_neg], ['orig', 'pos', 'neg']):
    # #     visualize_sample(data, name)
    
    net.train()#x_pos, x_neg)

    print('train error:', 1.0 - net.predict(x).eq(y).float().mean().item())

    x_te, y_te = next(iter(test_loader))
    x_te, y_te = x_te.cuda(), y_te.cuda()

    print('test error:', 1.0 - net.predict(x_te).eq(y_te).float().mean().item())

    print(net.predict(x_te)[:30], y_te[:30])


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1282.28it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1113.33it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1190.18it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1200.24it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1169.59it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1235.76it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1237.47it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1370.45it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1215.07it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1259.08it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1231.65it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1389.91it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1129.60it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1300.07it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1179.37it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1279.11it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1035.42it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1120.09it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1222.73it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1191.06it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1213.50it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1261.15it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1253.88it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1233.32it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1251.31it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1373.31it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1236.90it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1302.06it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1161.60it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1266.82it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1058.83it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1369.68it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1239.31it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1387.32it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1193.69it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1368.30it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1251.38it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1183.66it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1442.86it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1493.77it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1123.18it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1162.57it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1184.69it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1299.42it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1104.08it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1320.60it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1225.62it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1132.17it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1364.13it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1355.97it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1243.30it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1289.76it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1242.24it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1416.95it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1141.67it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1256.80it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1096.80it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1271.98it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1099.55it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1215.49it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1218.90it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1140.94it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1254.50it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1248.61it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1197.35it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1290.06it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1291.22it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1276.07it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1337.57it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1168.02it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1177.56it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1283.83it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1053.12it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1289.85it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1235.07it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1181.74it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1199.13it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1151.39it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1256.45it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1346.98it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1342.36it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1344.23it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1300.94it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1287.96it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1506.71it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1301.95it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1065.48it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1281.44it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1281.49it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1317.55it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1304.74it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1410.50it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1382.09it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1381.57it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1400.73it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1413.35it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1300.79it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1268.21it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1206.00it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1195.22it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1073.39it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1431.70it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1290.15it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1386.23it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1210.26it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1472.41it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1250.66it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1148.77it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1215.50it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1401.47it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1290.45it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1462.62it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1223.55it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1173.50it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1314.25it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1351.67it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1287.01it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1476.96it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1303.66it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1306.66it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1254.72it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1347.93it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1182.70it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1172.92it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1381.30it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1435.11it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1093.58it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1450.05it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1227.89it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1312.61it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1333.13it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1311.46it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1353.09it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1355.26it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1324.95it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1243.25it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1319.68it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1527.80it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1287.63it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1313.73it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1508.64it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1228.72it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1407.91it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1326.92it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1403.28it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1315.83it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1418.29it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1432.92it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1390.68it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1302.91it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1344.97it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1317.92it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1456.25it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1279.13it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1267.07it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1485.02it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1412.24it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1317.92it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1291.93it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1329.43it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1442.11it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1356.15it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1512.38it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1324.77it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1398.06it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1380.36it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1331.44it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1378.19it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1354.09it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1290.93it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1357.39it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1308.71it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1479.92it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1192.38it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1280.44it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1159.85it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1321.13it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1295.07it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1373.71it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1235.22it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1286.68it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1232.68it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1140.39it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1202.13it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1263.05it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1271.70it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1245.22it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1210.17it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1250.83it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1240.80it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1166.65it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1322.09it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1036.29it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1147.29it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1237.55it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1125.26it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1148.54it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1122.08it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1166.36it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1262.15it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1402.78it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1195.89it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1352.72it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1290.69it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1286.02it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1194.62it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1240.94it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1171.84it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1162.85it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1392.59it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1387.73it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1258.63it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1301.70it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1170.80it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1360.13it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1301.54it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1356.19it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1264.78it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1342.34it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1416.22it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1328.73it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1216.10it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1282.42it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1258.68it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1133.92it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1201.40it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1354.75it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1089.26it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1221.35it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1216.58it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1219.23it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1299.78it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1522.42it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1195.37it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1390.69it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1147.95it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1240.05it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1185.22it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1577.27it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1213.65it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1334.78it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1245.15it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1328.28it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1116.43it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1201.94it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1372.99it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1368.09it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1174.61it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1245.85it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1245.32it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1238.47it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1353.02it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1536.74it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1311.41it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1344.22it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1177.51it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1351.48it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1503.38it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1311.45it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1433.12it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1214.44it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1386.00it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1284.41it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1249.64it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1339.36it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1260.65it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1146.20it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1504.22it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1187.40it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1284.17it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1332.28it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1184.47it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:01<00:00, 981.33it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1202.41it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1250.46it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1328.09it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1377.10it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1506.11it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1314.17it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1105.42it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1472.28it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1416.81it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1570.73it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1404.19it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1289.31it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1268.17it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1267.85it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1326.57it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1458.60it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1286.09it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1325.74it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1263.13it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1250.02it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1327.52it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1251.93it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1228.72it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1307.98it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1315.76it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1155.60it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1224.77it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1222.60it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1380.59it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1403.69it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1384.36it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1222.09it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1257.81it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1156.33it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1082.12it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1445.71it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1441.42it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1317.03it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1167.81it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1314.21it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1209.19it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1157.23it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1264.53it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1623.82it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1353.06it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1292.59it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1372.28it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1311.76it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1190.92it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1378.87it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1289.11it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1119.25it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1358.40it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1210.85it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1244.84it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1326.38it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1122.71it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1427.99it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1305.91it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1313.10it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1384.48it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1404.27it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1019.51it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1468.05it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1327.41it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1358.51it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1268.66it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1212.57it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1255.09it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1275.88it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1204.28it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1402.16it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1512.41it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1315.80it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1083.03it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1285.73it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1251.89it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1350.68it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1485.62it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1424.62it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1486.21it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1407.30it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1232.82it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1354.60it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1285.52it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1311.66it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1415.42it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1185.20it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1304.53it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1198.99it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1406.09it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1196.09it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1343.98it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1257.41it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1477.54it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1017.97it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1519.22it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1088.57it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:01<00:00, 963.65it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1362.41it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1260.36it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1105.97it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1204.58it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1137.43it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1258.51it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1325.15it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1337.53it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1315.00it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1184.13it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1289.65it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1235.51it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1287.45it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1281.90it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1383.97it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1366.16it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1407.03it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1327.75it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1326.01it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1281.37it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1348.43it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1404.94it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1322.26it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1310.96it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1413.79it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1320.41it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1420.58it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1404.97it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1277.17it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1452.11it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1305.66it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1394.80it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1381.06it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1296.61it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1354.49it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1365.50it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1495.35it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1322.29it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1416.01it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1519.37it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1326.09it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1286.38it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1208.03it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1518.40it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1581.06it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1311.48it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1335.08it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1308.16it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1419.20it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1360.16it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1541.75it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1554.22it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1367.06it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1216.52it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1451.88it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1270.38it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1368.15it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1341.45it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1374.94it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1373.06it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1364.92it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1319.99it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1282.10it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1287.89it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1351.94it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1353.53it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1365.37it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1343.16it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1256.33it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1479.70it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1427.56it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1154.49it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1323.28it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1263.89it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1448.24it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1343.90it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1419.60it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1410.11it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1341.11it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1382.45it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1318.70it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1333.22it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1351.62it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1309.44it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1271.45it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1332.69it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1330.29it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1392.23it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1093.22it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1417.79it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1353.19it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1326.85it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1233.83it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1462.38it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1468.60it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1263.55it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1307.07it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1302.90it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1200.03it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1297.14it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1505.67it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1390.49it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1306.27it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1349.42it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1469.24it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1521.33it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1318.02it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1273.61it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1316.18it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1520.34it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1321.79it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1430.18it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1372.16it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1464.04it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1337.04it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1616.99it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1553.83it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1467.70it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1436.70it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1278.11it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1421.81it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1544.78it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1333.05it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1582.20it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1347.57it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1282.22it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1389.77it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1662.25it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1319.46it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1376.31it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1478.33it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1489.87it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1428.89it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1436.26it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1472.51it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1354.36it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1309.50it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1224.72it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1481.59it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1468.42it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1392.77it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1429.13it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1661.53it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1483.31it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1325.70it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1434.47it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1322.82it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1370.75it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1257.33it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1333.09it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1398.11it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1301.71it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1317.40it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1124.07it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1242.91it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1406.69it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1225.95it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1359.23it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1151.85it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1161.01it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1443.54it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1248.50it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1411.92it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1567.33it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1544.12it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1271.90it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1273.53it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1406.42it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1400.74it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1291.31it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1249.72it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1196.06it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1144.84it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1325.33it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1195.63it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1457.55it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1262.76it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1206.79it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1259.15it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1209.73it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1209.43it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1362.47it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1479.34it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1357.84it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1219.90it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1303.28it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1313.30it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1316.97it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1425.62it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1433.73it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1263.87it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1443.02it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1323.39it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1337.46it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1406.00it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1515.48it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1404.44it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1368.68it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1377.66it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1285.50it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1634.32it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1364.72it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1400.97it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1452.84it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1528.17it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1468.89it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1387.33it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1407.55it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1378.70it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1218.33it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1536.52it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1485.10it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1350.07it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1296.08it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1360.79it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1337.26it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1263.59it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1404.32it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1403.50it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1328.45it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1300.69it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1358.44it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1461.88it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1291.26it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1298.71it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1352.99it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1532.80it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1347.13it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1358.26it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1238.06it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1325.89it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1247.12it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1307.63it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1504.28it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1336.80it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1303.60it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1419.82it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1352.87it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1366.58it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1393.73it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1275.95it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1223.15it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1259.79it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1360.76it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1198.41it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1288.96it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1493.07it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1292.20it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1321.15it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1338.90it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1301.76it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1529.81it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1197.64it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1299.04it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1358.57it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1280.45it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1354.90it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1403.01it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1301.68it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1348.32it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1333.82it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1379.80it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1319.83it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1306.06it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1318.65it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1298.69it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1469.48it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1391.01it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1305.51it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1383.41it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1300.70it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1232.08it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1497.79it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1407.41it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1382.77it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1455.53it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1322.86it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1334.16it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1356.07it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1233.57it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1342.37it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1363.59it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1466.41it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1352.99it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1267.42it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1399.52it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1434.72it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1482.22it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1215.67it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1422.54it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1406.81it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1269.30it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1418.14it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1347.66it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1451.27it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1304.86it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1165.04it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1325.88it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1206.85it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1257.16it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1273.84it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1312.68it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1220.64it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1447.18it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1535.72it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1330.10it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1342.25it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1357.02it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1413.08it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1441.00it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1351.88it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1211.24it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1310.39it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1248.93it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1432.12it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1411.35it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1316.28it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1435.98it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1423.71it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1295.43it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1438.05it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1402.44it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1433.03it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1237.56it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1309.81it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1363.00it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1280.13it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1304.95it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1487.72it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1481.32it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1417.50it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1357.88it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1492.41it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1377.94it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1363.62it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1258.04it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1230.50it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1346.58it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1353.04it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1344.14it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1431.17it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1284.90it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1290.74it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1370.69it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1342.65it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1406.20it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1431.13it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1328.27it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1405.04it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1484.08it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1264.17it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1325.89it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1228.76it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1359.28it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1404.12it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1269.94it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1358.59it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1349.32it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1402.62it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1565.51it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1148.97it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1334.52it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1257.04it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1459.66it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1435.67it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1362.76it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1223.09it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1121.84it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1354.58it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1554.72it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1306.85it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1335.41it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1378.36it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1501.43it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1458.81it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1424.14it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1275.54it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1522.77it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1395.31it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1381.74it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1364.39it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1197.29it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1320.46it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1478.93it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1329.73it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1332.47it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1129.89it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1335.78it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1334.91it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1316.83it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1301.31it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1359.78it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1280.65it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1321.41it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1429.64it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1370.83it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1328.03it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1362.66it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1273.66it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1265.37it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1351.65it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1343.86it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1397.69it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1474.72it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1490.27it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1400.48it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1543.41it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1252.41it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1275.06it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1348.90it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1085.59it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1500.95it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1330.62it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1349.23it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1262.24it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1283.45it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1207.48it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1236.96it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1262.17it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1340.77it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1523.85it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1378.71it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1254.28it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1261.90it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1541.29it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1397.21it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1318.83it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1439.73it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1266.83it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1334.74it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1168.71it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1295.40it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1356.09it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1341.97it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1326.86it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1201.82it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1291.57it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1498.55it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1150.17it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1207.93it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1108.05it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1324.18it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1282.19it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1213.20it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1019.34it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1139.29it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1188.61it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1114.67it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1296.47it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1214.26it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1168.06it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1299.12it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1202.91it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1246.30it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1264.05it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1040.57it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1520.09it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1230.23it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1143.16it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1274.50it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:01<00:00, 983.38it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1269.45it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1300.93it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1223.31it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1405.28it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1074.10it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1448.79it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:01<00:00, 735.12it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1194.52it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1252.18it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1391.32it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1336.53it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:01<00:00, 952.21it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:01<00:00, 706.40it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1059.18it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1322.38it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1198.59it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1353.24it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1296.01it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1200.28it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1273.81it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1337.24it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1198.82it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1439.31it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1258.69it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1422.16it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1154.35it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1372.28it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1390.88it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1315.33it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1369.06it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1433.14it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1325.54it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1347.95it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1300.52it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1307.63it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1360.29it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1274.53it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1229.32it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1359.80it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1243.48it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1571.65it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1248.23it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1319.00it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1311.83it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1267.55it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1205.96it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1247.89it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1212.54it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1332.10it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1366.30it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1483.53it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1370.61it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1405.97it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1337.30it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1333.48it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1302.38it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1356.38it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1292.54it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1258.09it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1320.64it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1351.04it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1413.98it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1265.32it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1235.07it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1345.47it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1391.57it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1145.74it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1442.62it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1127.26it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1242.35it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1294.79it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1421.96it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1321.27it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1278.62it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1392.64it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1351.12it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1293.76it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1328.14it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1349.36it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1400.41it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1345.04it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1252.35it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1429.16it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1309.67it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1291.82it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1235.66it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1296.32it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1210.38it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1178.31it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1289.46it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1340.21it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1368.86it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1163.97it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1328.36it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1415.34it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1291.25it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1258.87it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1354.50it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1341.89it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1237.80it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1313.18it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1336.06it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1350.12it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1266.30it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1340.40it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1223.08it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1401.03it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1297.48it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1129.62it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1148.83it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1340.50it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1276.02it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1293.49it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1338.16it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1411.35it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1245.28it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1404.98it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1295.72it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1287.01it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1281.30it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1297.05it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1281.22it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1315.63it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1477.06it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1227.40it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1309.59it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1499.25it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1203.80it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1407.20it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1292.36it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1261.63it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1452.43it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1374.63it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1289.82it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1470.46it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1203.43it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1369.51it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1283.91it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1266.51it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1171.50it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1460.21it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1363.18it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1210.16it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1282.56it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1357.80it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1344.62it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1230.57it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1242.80it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1318.17it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1210.93it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1417.75it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1293.04it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1172.02it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1390.92it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1429.00it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1247.83it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1368.04it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1237.55it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1486.43it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1327.57it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1247.07it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1319.69it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1439.01it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1368.67it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1319.32it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1275.47it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1267.28it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1222.93it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1238.04it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1253.39it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1452.13it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1227.09it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1293.23it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1192.90it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1307.57it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1202.00it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1518.53it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1241.69it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1484.29it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1322.17it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1147.18it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1318.22it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1287.99it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1216.84it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1391.28it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1171.94it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1192.68it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1192.36it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1483.88it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1121.28it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1477.54it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1268.00it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1218.55it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1203.15it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1307.49it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1315.25it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1283.47it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1344.10it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1357.61it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1303.90it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1353.50it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1354.92it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1289.59it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1246.39it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1262.39it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1205.87it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1344.79it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1344.98it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1430.52it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1240.61it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1356.61it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1179.63it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1311.50it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1223.24it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1284.48it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1282.86it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1284.53it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1260.48it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1381.45it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1183.14it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1024.59it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1284.59it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1280.20it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1148.94it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1407.79it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1399.97it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1434.82it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1370.51it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1416.44it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1217.38it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1344.04it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1194.92it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1322.68it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1288.87it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1359.54it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1276.03it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1253.98it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1238.43it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1164.59it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1189.19it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1482.28it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1257.48it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1414.58it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1282.33it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1350.02it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1269.46it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1354.14it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1166.69it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1502.79it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1291.89it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1276.92it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1311.87it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1346.27it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1250.75it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1371.28it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1004.74it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1244.99it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1293.44it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1355.04it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1316.09it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1354.61it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1349.40it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1341.65it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1215.89it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1348.41it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1197.73it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1273.88it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1425.30it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1158.28it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1229.89it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1471.08it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1143.51it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1415.53it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1278.83it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1396.71it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1236.07it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1328.12it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1115.56it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1118.87it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1317.48it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1179.60it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1360.62it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1331.37it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1300.32it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1419.11it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1435.96it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1273.70it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1329.39it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1332.04it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1345.03it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1289.69it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1226.49it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1320.22it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1290.03it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1407.97it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1254.27it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1383.90it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1316.71it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1318.13it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1137.38it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1413.03it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1222.03it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1303.87it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1307.08it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1274.73it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1307.97it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1180.74it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1227.70it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1097.76it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1197.22it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1552.54it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1204.94it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1336.36it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1182.89it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1140.45it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1345.13it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1460.00it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1286.52it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1262.14it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1137.08it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1376.75it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1209.58it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1286.32it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1379.19it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1252.93it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1252.09it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1082.58it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1263.35it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1254.46it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1303.87it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1160.58it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1329.22it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1327.98it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1259.43it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1385.29it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1203.67it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1232.69it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1205.84it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1217.74it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1347.26it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1353.52it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1237.23it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1441.40it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1405.99it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1235.77it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1310.81it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1412.65it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1226.83it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1276.49it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1193.17it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1262.92it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1050.98it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1461.42it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1187.66it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1417.57it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1210.34it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1457.91it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1338.78it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1338.82it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1139.78it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1238.26it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1243.98it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1269.52it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1129.00it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1538.16it/s]


training layer 0 ...


100%|██████████| 1000/1000 [00:00<00:00, 1232.22it/s]


training layer 1 ...


100%|██████████| 1000/1000 [00:00<00:00, 1109.35it/s]


train error: 0.9000000059604645
test error: 0.9020000025629997
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0], device='cuda:0') tensor([7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0, 6, 9, 0, 1, 5, 9, 7, 3, 4, 9, 6, 6, 5,
        4, 0, 7, 4, 0, 1], device='cuda:0')


In [None]:
from train_utils import *

In [None]:
len(train_loader)

2

In [None]:
next(iter(train_loader))[0].shape

torch.Size([50000, 784])

In [17]:
train_loader, test_loader = MNIST_loaders(train_batch_size=10000, test_batch_size=10000)

In [21]:
for i in train_loader:
    print(i[1][:10])
    # break

tensor([8, 2, 6, 1, 3, 7, 4, 3, 4, 4])
tensor([4, 1, 2, 1, 1, 7, 4, 7, 7, 4])
tensor([6, 6, 1, 7, 2, 1, 8, 9, 3, 8])
tensor([1, 1, 2, 7, 9, 4, 4, 5, 7, 0])
tensor([6, 3, 1, 6, 8, 0, 6, 4, 9, 0])
tensor([2, 9, 3, 3, 1, 0, 0, 8, 2, 3])
