# <center>PROJECT SANDBOX</center>

## Documentation
The aim of this notebook is to provide a simple sandbox to test different NN architectures for the project. , here is a doc about the functions imported from `scripts` folder : 

- **`prepare_dataset(device,ratio=0.5,shuffle_ctx=False)`** :
    - **Input**:
        - device : a torch.device object
        - ratio : a float ratio between 0 and 1 that determines the average proportion of modern english verses in the data loader
        - shuffle_ctx : if `True`, shuffle the contexts within a Batch so that half of the `x_1` elements has a wrong context `ctx_1`. Useful to train the context recognizer model.
    - **Return** :
        - a torch Dataset | class : Shakespeare inherited from torch.utils.data.Dataset
        - a python word dictionary (aka tokenizer) | class : dict
    - **Tensors returned when loaded in the dataloader**:
        - x_1 : input verse (modern / shakespearian)
        - x_2 : output verse (modern / shakespearian)

        - ctx_1 = context of the input verse
        - ctx_2 = context of the output verse

        - len_x : length of the input verse
        - len_y : length of the output verse

        - len_ctx_x : length of the input verse context
        - len_ctx_y : length of the output verse context

        - label : label of the input verse (0 : modern, 1 : shakespearian)
        - label_ctx : label of the context (0 : wrong context, 1 : right context)
- **`string2code(string,dict)`** : 
    - **Input**:
        - string : a sentence
        - dict : a tokenizer
    - **Return** :
        - a torch Longtensor (sentence tokenized)
- **`code2string(torch.Longtensor,dict)`** : 
    - **Input**:
        - torch.Longtensor : a sentence tokenized
        - dict : a tokenizer
    - **Return** :
        - a string sentence

## Importing packages

In [1]:
from scripts.data_builders.prepare_dataset import prepare_dataset_ctx,string2code,code2string,assemble

import torch
import torchvision.datasets as datasets
import torch.nn.functional as F
from torch import nn
from torch import optim
from torch.utils.tensorboard import SummaryWriter
import math
from torch.nn import BCELoss,CrossEntropyLoss
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.nn import TransformerEncoder, TransformerEncoderLayer
import pickle
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("device = ",device)

device =  cuda


## Preprocessing data

In [2]:
train_data, dict_words = prepare_dataset_ctx("data/shakespeare.csv",device,ratio=0.5,shuffle_ctx=True) #check with shift+tab to look at the data structure
batch_size = 8
dict_token = {b:a for a,b in dict_words.items()} #dict for code2string

train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
                                           shuffle=True,collate_fn=train_data.collate)
dict_size = len(dict_words)
d_embedding = 300 #cf. paper Y.Kim 2014 Convolutional Neural Networks for Sentence Classification

print("- dict size : ",dict_size)

Loading ...
- Shakespeare context dataset length :  21079
- Corrupted samples (ignored) :  0
- dict size :  17513


## Designing NN model

# Model 1 : CNN

In [3]:
class CoherenceClassifier(torch.nn.Module):
    def __init__(self,dict_size=dict_size,d_embedding=300):
        super().__init__()
        self.embed_layer=torch.nn.Embedding(dict_size+1,d_embedding,padding_idx=dict_size)

        self.conv_1 = torch.nn.Conv1d(d_embedding,3,kernel_size = 3, stride = 1)
        self.max_pool = torch.nn.MaxPool1d(3,2)
        self.relu = torch.nn.ReLU()
        self.linear = torch.nn.Linear(3,1)
    
    def forward(self,x):
        x = self.embed_layer(x)
        x = self.conv_1( x.transpose(1,2))
        x = self.max_pool( x )
        x = self.relu( x )
        x = torch.max( x , 2 )[0]
        x = torch.sigmoid(self.linear(x))
        return x

In [4]:
epochs = 100
model = CoherenceClassifier().to(device)
optimizer = optim.Adam(params=model.parameters(),lr=0.01)
loss_func = BCELoss()
n = len(train_data.x) // batch_size

In [5]:
for epoch in range(epochs):
    total_loss = 0
    i = 0
    for _,_ , ctx,_ , _,_ , len_ctx,_, _,label_ctx in train_loader:
        i+=1
        optimizer.zero_grad()
        
        #CNN
        ctx = model.forward(ctx).reshape(-1) #CNN architecture
        
        loss = loss_func( ctx , label_ctx.float() )
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        
        #Vizualization

    print('-' * 35)
    print('| epoch {:3d} | '
          'lr {:02.2f} | '
          'loss {:5.2f}'.format(
            epoch+1, optimizer.state_dict()["param_groups"][0]["lr"],
            round(total_loss,2)))

ValueError: not enough values to unpack (expected 10, got 4)

# Model 2 : LSTM 

In [None]:
class CoherenceClassifier(torch.nn.Module):
    def __init__(self,dict_size=dict_size,d_embedding=300,d_hidden=100):
        super().__init__()
        self.d_hidden = d_hidden
        self.embedding = nn.Embedding(dict_size+1,d_embedding,padding_idx=dict_size)
        self.lstm = nn.LSTM(d_embedding,self.d_hidden,dropout=0.,num_layers=1,bidirectional=False)
        #self.bn0 = nn.BatchNorm1d(self.d_hidden)
        self.linear1 = torch.nn.Linear(self.d_hidden,1)
    
    def forward(self,x,len_x):
        x = self.embedding(x)
        x = pack_padded_sequence(x.permute(1,0,2),len_x,enforce_sorted=False)
        _,x = self.lstm(x)
        x = x[0].reshape(-1,self.d_hidden)
        #x = self.bn0(x)
        x = torch.sigmoid( self.linear1(x) ).reshape(-1)
        return x

In [None]:
epochs = 100
model = CoherenceClassifier().to(device)
optimizer = optim.Adam(params=model.parameters(),lr=0.01)
loss_func = BCELoss()
n = len(train_data.x) // batch_size

In [None]:
for epoch in range(epochs):
    total_loss = 0
    i = 0
    for _,_ , ctx,_ , _,_ , len_ctx,_, _,label_ctx in train_loader:
        i+=1
        optimizer.zero_grad()
        
        #LSTM
        ctx = model.forward(ctx,len_ctx) #LSTM architecture
        
        loss = loss_func( ctx , label_ctx.float() )
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        
    print('-' * 35)
    print('| epoch {:3d} | '
          'lr {:02.2f} | '
          'loss {:5.2f}'.format(
            epoch+1, optimizer.state_dict()["param_groups"][0]["lr"],
            round(total_loss,2)))

# Model 3 : Transformers

In [None]:
class CoherenceClassifier(nn.Module):

    def __init__(self,dict_size=dict_size, d_embedding=300,  dropout=0.1):
        super(CoherenceClassifier, self).__init__()
        
        self.embedding = nn.Embedding(dict_size+1,d_embedding,padding_idx=dict_size)
        self.pos_encoder = PositionalEncoding(d_embedding, dropout)
        encoder_layers = TransformerEncoderLayer(d_model=d_embedding, nhead = 4,dropout=dropout)
        self.transformer_encoder = TransformerEncoder(encoder_layers, num_layers=4)
        
        self.decoder = nn.Linear(d_embedding, 2 )

    def forward(self, x):

        x = self.embedding( x )
        #x = self.pos_encoder( x )
        x = self.transformer_encoder( x )
        x = torch.softmax(torch.tanh(self.decoder( x )),1)
        return x
    
    
class PositionalEncoding(nn.Module):

    def __init__(self, d_model, dropout=0.1, max_len=50):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x * math.sqrt(self.d_model)
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

In [None]:
epochs = 100
model = CoherenceClassifier().to(device)
optimizer = optim.Adam(params=model.parameters(),lr=0.01)
loss_func = CrossEntropyLoss()
n = len(train_data.x) // batch_size

In [None]:
for epoch in range(epochs):
    total_loss = 0
    i = 0
    for _,_ , ctx,_ , _,_ , len_ctx,_, _,label_ctx in train_loader:
        i+=1
        optimizer.zero_grad()
        
        #Transformer
        ctx = model.forward(ctx) #Transformer architecture
        y = torch.cat([label_ctx.reshape(-1,1),1-label_ctx.reshape(-1,1)],dim=1) # Transformer architecture
        
        loss = loss_func( ctx , y )
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        
    print('-' * 35)
    print('| epoch {:3d} | '
          'lr {:02.2f} | '
          'loss {:5.2f}'.format(
            epoch+1, optimizer.state_dict()["param_groups"][0]["lr"],
            round(total_loss,2)))

# Model 4 :  Pre-trained model

In [3]:
epochs = 100

#Load model and plug our Embedding in, and freeze Embedding
model = torch.hub.load('huggingface/pytorch-transformers', 'modelForSequenceClassification', 'bert-base-uncased').to(device)
model.bert.embeddings.word_embeddings = torch.load("data/models/embeddings/v0").to(device)
for param in model.bert.embeddings.word_embeddings.parameters():
    param.requires_grad = False
    
    
model.train()

optimizer = optim.Adam(params=model.parameters(),lr=0.01)
loss_func = CrossEntropyLoss()
n = len(train_data.x) // batch_size

Using cache found in /home/jb/.cache/torch/hub/huggingface_pytorch-transformers_master


In [4]:
for epoch in range(epochs):
    total_loss,total_accuracy = 0,0
    i = 0
    for ctx,pos_token,pos_ctx,label in train_loader:
        i+=1
        optimizer.zero_grad()
        
        #pre-trained BERT
        ctx = model.forward(input_ids=ctx,
                           token_type_ids=pos_ctx,
                            position_ids=pos_token)[0]
        
        loss = loss_func( ctx , label )
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        accuracy = 1 - (ctx.argmax(dim=1) - label).abs().sum().item() / ctx.shape[0]
        total_accuracy += accuracy
        
        print('| epoch {:3d} | {:5d}/{:5d} batches | '
              'loss {:5.2f} | total loss {:5.2f} | '
              'accuracy {:5.2f} | total accuracy {:5.2f}'.format(
                epoch+1, i, n,loss.item(),total_loss/(i+n*epoch),accuracy,total_accuracy/(i+n*epoch)))
    print('-' * 35)
    print("Epoch ",epoch,"\t",round(total_loss / n,2))

| epoch   1 |     1/ 2634 batches | loss  1.10 | total loss  1.10 | accuracy  0.12 | total accuracy  0.12
| epoch   1 |     2/ 2634 batches | loss  4.02 | total loss  2.56 | accuracy  0.25 | total accuracy  0.19
| epoch   1 |     3/ 2634 batches | loss  1.40 | total loss  2.17 | accuracy  0.88 | total accuracy  0.42
| epoch   1 |     4/ 2634 batches | loss  7.42 | total loss  3.49 | accuracy  0.25 | total accuracy  0.38
| epoch   1 |     5/ 2634 batches | loss  1.75 | total loss  3.14 | accuracy  0.62 | total accuracy  0.42
| epoch   1 |     6/ 2634 batches | loss  0.82 | total loss  2.75 | accuracy  0.75 | total accuracy  0.48
| epoch   1 |     7/ 2634 batches | loss  1.81 | total loss  2.62 | accuracy  0.38 | total accuracy  0.46
| epoch   1 |     8/ 2634 batches | loss  2.12 | total loss  2.55 | accuracy  0.38 | total accuracy  0.45
| epoch   1 |     9/ 2634 batches | loss  1.04 | total loss  2.39 | accuracy  0.38 | total accuracy  0.44
| epoch   1 |    10/ 2634 batches | loss  1.07

| epoch   1 |    79/ 2634 batches | loss  0.81 | total loss  1.30 | accuracy  0.62 | total accuracy  0.51
| epoch   1 |    80/ 2634 batches | loss  2.51 | total loss  1.31 | accuracy  0.50 | total accuracy  0.51
| epoch   1 |    81/ 2634 batches | loss  3.50 | total loss  1.34 | accuracy  0.38 | total accuracy  0.51
| epoch   1 |    82/ 2634 batches | loss  1.89 | total loss  1.35 | accuracy  0.50 | total accuracy  0.51
| epoch   1 |    83/ 2634 batches | loss  0.77 | total loss  1.34 | accuracy  0.62 | total accuracy  0.51
| epoch   1 |    84/ 2634 batches | loss  1.18 | total loss  1.34 | accuracy  0.75 | total accuracy  0.51
| epoch   1 |    85/ 2634 batches | loss  0.00 | total loss  1.32 | accuracy  1.00 | total accuracy  0.52
| epoch   1 |    86/ 2634 batches | loss  3.58 | total loss  1.35 | accuracy  0.62 | total accuracy  0.52
| epoch   1 |    87/ 2634 batches | loss  7.55 | total loss  1.42 | accuracy  0.25 | total accuracy  0.52
| epoch   1 |    88/ 2634 batches | loss  4.11

| epoch   1 |   158/ 2634 batches | loss  0.66 | total loss  1.41 | accuracy  0.50 | total accuracy  0.51
| epoch   1 |   159/ 2634 batches | loss  0.71 | total loss  1.41 | accuracy  0.50 | total accuracy  0.51
| epoch   1 |   160/ 2634 batches | loss  0.86 | total loss  1.41 | accuracy  0.50 | total accuracy  0.51
| epoch   1 |   161/ 2634 batches | loss  0.78 | total loss  1.40 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |   162/ 2634 batches | loss  0.42 | total loss  1.40 | accuracy  0.88 | total accuracy  0.51
| epoch   1 |   163/ 2634 batches | loss  0.62 | total loss  1.39 | accuracy  0.75 | total accuracy  0.51
| epoch   1 |   164/ 2634 batches | loss  1.02 | total loss  1.39 | accuracy  0.62 | total accuracy  0.51
| epoch   1 |   165/ 2634 batches | loss  0.78 | total loss  1.39 | accuracy  0.62 | total accuracy  0.51
| epoch   1 |   166/ 2634 batches | loss  0.99 | total loss  1.38 | accuracy  0.38 | total accuracy  0.51
| epoch   1 |   167/ 2634 batches | loss  0.87

| epoch   1 |   236/ 2634 batches | loss  0.74 | total loss  1.31 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |   237/ 2634 batches | loss  0.71 | total loss  1.31 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |   238/ 2634 batches | loss  0.87 | total loss  1.31 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |   239/ 2634 batches | loss  0.86 | total loss  1.31 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |   240/ 2634 batches | loss  0.93 | total loss  1.30 | accuracy  0.62 | total accuracy  0.51
| epoch   1 |   241/ 2634 batches | loss  0.50 | total loss  1.30 | accuracy  0.75 | total accuracy  0.51
| epoch   1 |   242/ 2634 batches | loss  0.62 | total loss  1.30 | accuracy  0.62 | total accuracy  0.51
| epoch   1 |   243/ 2634 batches | loss  0.77 | total loss  1.30 | accuracy  0.62 | total accuracy  0.51
| epoch   1 |   244/ 2634 batches | loss  0.60 | total loss  1.29 | accuracy  0.75 | total accuracy  0.51
| epoch   1 |   245/ 2634 batches | loss  1.54

| epoch   1 |   314/ 2634 batches | loss  1.46 | total loss  1.23 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |   315/ 2634 batches | loss  0.74 | total loss  1.23 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |   316/ 2634 batches | loss  0.68 | total loss  1.23 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |   317/ 2634 batches | loss  0.90 | total loss  1.23 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |   318/ 2634 batches | loss  0.80 | total loss  1.23 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |   319/ 2634 batches | loss  0.68 | total loss  1.23 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |   320/ 2634 batches | loss  0.66 | total loss  1.22 | accuracy  0.75 | total accuracy  0.51
| epoch   1 |   321/ 2634 batches | loss  0.91 | total loss  1.22 | accuracy  0.38 | total accuracy  0.51
| epoch   1 |   322/ 2634 batches | loss  0.80 | total loss  1.22 | accuracy  0.50 | total accuracy  0.51
| epoch   1 |   323/ 2634 batches | loss  0.65

| epoch   1 |   393/ 2634 batches | loss  1.39 | total loss  1.26 | accuracy  0.62 | total accuracy  0.51
| epoch   1 |   394/ 2634 batches | loss  1.92 | total loss  1.27 | accuracy  0.62 | total accuracy  0.51
| epoch   1 |   395/ 2634 batches | loss  2.30 | total loss  1.27 | accuracy  0.50 | total accuracy  0.51
| epoch   1 |   396/ 2634 batches | loss  1.66 | total loss  1.27 | accuracy  0.38 | total accuracy  0.51
| epoch   1 |   397/ 2634 batches | loss  1.11 | total loss  1.27 | accuracy  0.62 | total accuracy  0.51
| epoch   1 |   398/ 2634 batches | loss  1.89 | total loss  1.27 | accuracy  0.62 | total accuracy  0.51
| epoch   1 |   399/ 2634 batches | loss  1.58 | total loss  1.27 | accuracy  0.75 | total accuracy  0.51
| epoch   1 |   400/ 2634 batches | loss  3.41 | total loss  1.28 | accuracy  0.38 | total accuracy  0.51
| epoch   1 |   401/ 2634 batches | loss  1.33 | total loss  1.28 | accuracy  0.50 | total accuracy  0.51
| epoch   1 |   402/ 2634 batches | loss  0.59

| epoch   1 |   471/ 2634 batches | loss  1.14 | total loss  1.29 | accuracy  0.62 | total accuracy  0.51
| epoch   1 |   472/ 2634 batches | loss  1.07 | total loss  1.29 | accuracy  0.50 | total accuracy  0.51
| epoch   1 |   473/ 2634 batches | loss  0.62 | total loss  1.29 | accuracy  0.75 | total accuracy  0.51
| epoch   1 |   474/ 2634 batches | loss  0.83 | total loss  1.29 | accuracy  0.50 | total accuracy  0.51
| epoch   1 |   475/ 2634 batches | loss  0.78 | total loss  1.29 | accuracy  0.38 | total accuracy  0.51
| epoch   1 |   476/ 2634 batches | loss  0.80 | total loss  1.28 | accuracy  0.50 | total accuracy  0.51
| epoch   1 |   477/ 2634 batches | loss  0.87 | total loss  1.28 | accuracy  0.50 | total accuracy  0.51
| epoch   1 |   478/ 2634 batches | loss  1.32 | total loss  1.28 | accuracy  0.25 | total accuracy  0.51
| epoch   1 |   479/ 2634 batches | loss  0.76 | total loss  1.28 | accuracy  0.38 | total accuracy  0.51
| epoch   1 |   480/ 2634 batches | loss  1.69

| epoch   1 |   549/ 2634 batches | loss  0.93 | total loss  1.26 | accuracy  0.38 | total accuracy  0.51
| epoch   1 |   550/ 2634 batches | loss  1.11 | total loss  1.26 | accuracy  0.38 | total accuracy  0.51
| epoch   1 |   551/ 2634 batches | loss  1.18 | total loss  1.26 | accuracy  0.62 | total accuracy  0.51
| epoch   1 |   552/ 2634 batches | loss  0.79 | total loss  1.25 | accuracy  0.62 | total accuracy  0.51
| epoch   1 |   553/ 2634 batches | loss  1.06 | total loss  1.25 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |   554/ 2634 batches | loss  1.40 | total loss  1.25 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |   555/ 2634 batches | loss  0.71 | total loss  1.25 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |   556/ 2634 batches | loss  0.70 | total loss  1.25 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |   557/ 2634 batches | loss  1.08 | total loss  1.25 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |   558/ 2634 batches | loss  1.14

| epoch   1 |   627/ 2634 batches | loss  0.61 | total loss  1.26 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |   628/ 2634 batches | loss  1.95 | total loss  1.26 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |   629/ 2634 batches | loss  1.48 | total loss  1.26 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |   630/ 2634 batches | loss  0.80 | total loss  1.26 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |   631/ 2634 batches | loss  0.47 | total loss  1.26 | accuracy  0.88 | total accuracy  0.50
| epoch   1 |   632/ 2634 batches | loss  2.50 | total loss  1.26 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |   633/ 2634 batches | loss  1.38 | total loss  1.26 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |   634/ 2634 batches | loss  1.79 | total loss  1.26 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |   635/ 2634 batches | loss  2.10 | total loss  1.26 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |   636/ 2634 batches | loss  3.33

| epoch   1 |   705/ 2634 batches | loss  1.21 | total loss  1.25 | accuracy  0.62 | total accuracy  0.51
| epoch   1 |   706/ 2634 batches | loss  1.66 | total loss  1.25 | accuracy  0.50 | total accuracy  0.51
| epoch   1 |   707/ 2634 batches | loss  1.00 | total loss  1.25 | accuracy  0.25 | total accuracy  0.51
| epoch   1 |   708/ 2634 batches | loss  0.68 | total loss  1.25 | accuracy  0.62 | total accuracy  0.51
| epoch   1 |   709/ 2634 batches | loss  0.68 | total loss  1.25 | accuracy  0.75 | total accuracy  0.51
| epoch   1 |   710/ 2634 batches | loss  2.52 | total loss  1.25 | accuracy  0.38 | total accuracy  0.51
| epoch   1 |   711/ 2634 batches | loss  1.30 | total loss  1.25 | accuracy  0.50 | total accuracy  0.51
| epoch   1 |   712/ 2634 batches | loss  1.04 | total loss  1.25 | accuracy  0.25 | total accuracy  0.51
| epoch   1 |   713/ 2634 batches | loss  1.43 | total loss  1.25 | accuracy  0.12 | total accuracy  0.51
| epoch   1 |   714/ 2634 batches | loss  1.02

| epoch   1 |   783/ 2634 batches | loss  1.07 | total loss  1.26 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |   784/ 2634 batches | loss  0.86 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |   785/ 2634 batches | loss  0.79 | total loss  1.26 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |   786/ 2634 batches | loss  1.28 | total loss  1.26 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |   787/ 2634 batches | loss  1.40 | total loss  1.26 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |   788/ 2634 batches | loss  0.56 | total loss  1.26 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |   789/ 2634 batches | loss  1.71 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |   790/ 2634 batches | loss  2.10 | total loss  1.26 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |   791/ 2634 batches | loss  1.38 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |   792/ 2634 batches | loss  0.84

| epoch   1 |   861/ 2634 batches | loss  0.48 | total loss  1.24 | accuracy  0.88 | total accuracy  0.50
| epoch   1 |   862/ 2634 batches | loss  0.70 | total loss  1.24 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |   863/ 2634 batches | loss  1.13 | total loss  1.24 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |   864/ 2634 batches | loss  1.05 | total loss  1.24 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |   865/ 2634 batches | loss  1.53 | total loss  1.24 | accuracy  0.12 | total accuracy  0.50
| epoch   1 |   866/ 2634 batches | loss  1.11 | total loss  1.24 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |   867/ 2634 batches | loss  0.72 | total loss  1.24 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |   868/ 2634 batches | loss  0.74 | total loss  1.24 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |   869/ 2634 batches | loss  0.72 | total loss  1.24 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |   870/ 2634 batches | loss  0.96

| epoch   1 |   939/ 2634 batches | loss  1.53 | total loss  1.24 | accuracy  0.00 | total accuracy  0.50
| epoch   1 |   940/ 2634 batches | loss  3.39 | total loss  1.24 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |   941/ 2634 batches | loss  2.90 | total loss  1.24 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |   942/ 2634 batches | loss  1.20 | total loss  1.24 | accuracy  0.88 | total accuracy  0.50
| epoch   1 |   943/ 2634 batches | loss  6.94 | total loss  1.25 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |   944/ 2634 batches | loss  4.30 | total loss  1.25 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |   945/ 2634 batches | loss  1.01 | total loss  1.25 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |   946/ 2634 batches | loss  2.07 | total loss  1.25 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |   947/ 2634 batches | loss  1.84 | total loss  1.25 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |   948/ 2634 batches | loss  5.60

| epoch   1 |  1017/ 2634 batches | loss  0.79 | total loss  1.30 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1018/ 2634 batches | loss  1.10 | total loss  1.30 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  1019/ 2634 batches | loss  2.28 | total loss  1.30 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |  1020/ 2634 batches | loss  0.80 | total loss  1.30 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1021/ 2634 batches | loss  0.77 | total loss  1.29 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  1022/ 2634 batches | loss  2.79 | total loss  1.30 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |  1023/ 2634 batches | loss  1.18 | total loss  1.30 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1024/ 2634 batches | loss  0.63 | total loss  1.30 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  1025/ 2634 batches | loss  1.15 | total loss  1.30 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1026/ 2634 batches | loss  1.80

| epoch   1 |  1096/ 2634 batches | loss  0.45 | total loss  1.28 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |  1097/ 2634 batches | loss  1.15 | total loss  1.28 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1098/ 2634 batches | loss  1.01 | total loss  1.28 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1099/ 2634 batches | loss  1.30 | total loss  1.28 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |  1100/ 2634 batches | loss  0.85 | total loss  1.28 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1101/ 2634 batches | loss  1.02 | total loss  1.28 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1102/ 2634 batches | loss  0.87 | total loss  1.28 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1103/ 2634 batches | loss  0.52 | total loss  1.28 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |  1104/ 2634 batches | loss  1.64 | total loss  1.28 | accuracy  0.12 | total accuracy  0.50
| epoch   1 |  1105/ 2634 batches | loss  0.72

| epoch   1 |  1174/ 2634 batches | loss  0.48 | total loss  1.26 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |  1175/ 2634 batches | loss  0.86 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1176/ 2634 batches | loss  0.89 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1177/ 2634 batches | loss  0.90 | total loss  1.26 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1178/ 2634 batches | loss  0.60 | total loss  1.26 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  1179/ 2634 batches | loss  0.44 | total loss  1.26 | accuracy  0.88 | total accuracy  0.50
| epoch   1 |  1180/ 2634 batches | loss  1.36 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1181/ 2634 batches | loss  0.98 | total loss  1.26 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1182/ 2634 batches | loss  1.00 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1183/ 2634 batches | loss  2.02

| epoch   1 |  1252/ 2634 batches | loss  0.78 | total loss  1.25 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1253/ 2634 batches | loss  1.38 | total loss  1.25 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1254/ 2634 batches | loss  0.44 | total loss  1.25 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |  1255/ 2634 batches | loss  0.84 | total loss  1.25 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1256/ 2634 batches | loss  0.43 | total loss  1.25 | accuracy  0.88 | total accuracy  0.50
| epoch   1 |  1257/ 2634 batches | loss  2.39 | total loss  1.25 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1258/ 2634 batches | loss  2.46 | total loss  1.25 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1259/ 2634 batches | loss  0.96 | total loss  1.25 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1260/ 2634 batches | loss  1.07 | total loss  1.25 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  1261/ 2634 batches | loss  2.65

| epoch   1 |  1330/ 2634 batches | loss  0.68 | total loss  1.28 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1331/ 2634 batches | loss  0.81 | total loss  1.28 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |  1332/ 2634 batches | loss  3.03 | total loss  1.28 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |  1333/ 2634 batches | loss  1.23 | total loss  1.28 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1334/ 2634 batches | loss  1.12 | total loss  1.28 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1335/ 2634 batches | loss  1.23 | total loss  1.28 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1336/ 2634 batches | loss  1.73 | total loss  1.28 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |  1337/ 2634 batches | loss  0.77 | total loss  1.28 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1338/ 2634 batches | loss  1.82 | total loss  1.28 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1339/ 2634 batches | loss  0.90

| epoch   1 |  1409/ 2634 batches | loss  0.63 | total loss  1.26 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  1410/ 2634 batches | loss  0.84 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1411/ 2634 batches | loss  0.80 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1412/ 2634 batches | loss  0.62 | total loss  1.26 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  1413/ 2634 batches | loss  0.60 | total loss  1.26 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  1414/ 2634 batches | loss  0.98 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1415/ 2634 batches | loss  0.92 | total loss  1.26 | accuracy  0.12 | total accuracy  0.50
| epoch   1 |  1416/ 2634 batches | loss  0.75 | total loss  1.26 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1417/ 2634 batches | loss  1.43 | total loss  1.26 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1418/ 2634 batches | loss  1.58

| epoch   1 |  1487/ 2634 batches | loss  0.80 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1488/ 2634 batches | loss  0.79 | total loss  1.26 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  1489/ 2634 batches | loss  0.89 | total loss  1.26 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1490/ 2634 batches | loss  0.66 | total loss  1.26 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |  1491/ 2634 batches | loss  0.67 | total loss  1.26 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |  1492/ 2634 batches | loss  0.67 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1493/ 2634 batches | loss  0.80 | total loss  1.26 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1494/ 2634 batches | loss  0.78 | total loss  1.26 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |  1495/ 2634 batches | loss  1.57 | total loss  1.26 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |  1496/ 2634 batches | loss  0.79

| epoch   1 |  1566/ 2634 batches | loss  0.96 | total loss  1.26 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1567/ 2634 batches | loss  1.07 | total loss  1.26 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |  1568/ 2634 batches | loss  0.94 | total loss  1.26 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1569/ 2634 batches | loss  1.01 | total loss  1.26 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1570/ 2634 batches | loss  1.69 | total loss  1.26 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1571/ 2634 batches | loss  1.19 | total loss  1.26 | accuracy  0.00 | total accuracy  0.50
| epoch   1 |  1572/ 2634 batches | loss  0.71 | total loss  1.26 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |  1573/ 2634 batches | loss  1.68 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1574/ 2634 batches | loss  0.43 | total loss  1.26 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |  1575/ 2634 batches | loss  0.81

| epoch   1 |  1644/ 2634 batches | loss  0.99 | total loss  1.25 | accuracy  0.12 | total accuracy  0.50
| epoch   1 |  1645/ 2634 batches | loss  0.91 | total loss  1.25 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1646/ 2634 batches | loss  1.23 | total loss  1.25 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1647/ 2634 batches | loss  1.13 | total loss  1.25 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1648/ 2634 batches | loss  1.62 | total loss  1.25 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |  1649/ 2634 batches | loss  0.89 | total loss  1.25 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |  1650/ 2634 batches | loss  0.77 | total loss  1.25 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |  1651/ 2634 batches | loss  1.97 | total loss  1.25 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1652/ 2634 batches | loss  1.80 | total loss  1.25 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1653/ 2634 batches | loss  1.14

| epoch   1 |  1722/ 2634 batches | loss  1.63 | total loss  1.26 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  1723/ 2634 batches | loss  1.10 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1724/ 2634 batches | loss  0.92 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1725/ 2634 batches | loss  0.07 | total loss  1.26 | accuracy  1.00 | total accuracy  0.50
| epoch   1 |  1726/ 2634 batches | loss  3.40 | total loss  1.26 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1727/ 2634 batches | loss  2.40 | total loss  1.26 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1728/ 2634 batches | loss  0.78 | total loss  1.26 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1729/ 2634 batches | loss  1.43 | total loss  1.26 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1730/ 2634 batches | loss  0.99 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1731/ 2634 batches | loss  1.03

| epoch   1 |  1801/ 2634 batches | loss  3.20 | total loss  1.26 | accuracy  0.12 | total accuracy  0.50
| epoch   1 |  1802/ 2634 batches | loss  1.21 | total loss  1.26 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |  1803/ 2634 batches | loss  0.64 | total loss  1.26 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |  1804/ 2634 batches | loss  0.74 | total loss  1.26 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |  1805/ 2634 batches | loss  1.31 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1806/ 2634 batches | loss  0.94 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1807/ 2634 batches | loss  1.13 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1808/ 2634 batches | loss  1.37 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1809/ 2634 batches | loss  1.95 | total loss  1.26 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1810/ 2634 batches | loss  0.57

| epoch   1 |  1880/ 2634 batches | loss  1.42 | total loss  1.25 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1881/ 2634 batches | loss  1.03 | total loss  1.25 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |  1882/ 2634 batches | loss  0.85 | total loss  1.25 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  1883/ 2634 batches | loss  0.98 | total loss  1.25 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  1884/ 2634 batches | loss  1.69 | total loss  1.25 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  1885/ 2634 batches | loss  0.68 | total loss  1.25 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  1886/ 2634 batches | loss  0.81 | total loss  1.25 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1887/ 2634 batches | loss  1.50 | total loss  1.25 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  1888/ 2634 batches | loss  3.16 | total loss  1.26 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |  1889/ 2634 batches | loss  0.69

| epoch   1 |  1958/ 2634 batches | loss  1.03 | total loss  1.26 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |  1959/ 2634 batches | loss  0.78 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1960/ 2634 batches | loss  0.81 | total loss  1.26 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  1961/ 2634 batches | loss  1.69 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1962/ 2634 batches | loss  1.41 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1963/ 2634 batches | loss  1.04 | total loss  1.26 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |  1964/ 2634 batches | loss  1.02 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1965/ 2634 batches | loss  1.90 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  1966/ 2634 batches | loss  2.18 | total loss  1.26 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |  1967/ 2634 batches | loss  1.24

| epoch   1 |  2037/ 2634 batches | loss  0.68 | total loss  1.25 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  2038/ 2634 batches | loss  0.84 | total loss  1.25 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  2039/ 2634 batches | loss  2.08 | total loss  1.25 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |  2040/ 2634 batches | loss  1.28 | total loss  1.25 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  2041/ 2634 batches | loss  1.94 | total loss  1.25 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |  2042/ 2634 batches | loss  1.06 | total loss  1.25 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  2043/ 2634 batches | loss  1.61 | total loss  1.25 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  2044/ 2634 batches | loss  3.42 | total loss  1.25 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  2045/ 2634 batches | loss  2.50 | total loss  1.25 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |  2046/ 2634 batches | loss  0.90

| epoch   1 |  2116/ 2634 batches | loss  1.10 | total loss  1.26 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  2117/ 2634 batches | loss  1.19 | total loss  1.26 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  2118/ 2634 batches | loss  1.03 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  2119/ 2634 batches | loss  2.35 | total loss  1.26 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |  2120/ 2634 batches | loss  0.71 | total loss  1.26 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  2121/ 2634 batches | loss  1.08 | total loss  1.26 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |  2122/ 2634 batches | loss  1.45 | total loss  1.26 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  2123/ 2634 batches | loss  1.54 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  2124/ 2634 batches | loss  0.82 | total loss  1.26 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  2125/ 2634 batches | loss  1.36

| epoch   1 |  2195/ 2634 batches | loss  0.65 | total loss  1.25 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |  2196/ 2634 batches | loss  0.93 | total loss  1.25 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  2197/ 2634 batches | loss  0.67 | total loss  1.25 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |  2198/ 2634 batches | loss  0.75 | total loss  1.25 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  2199/ 2634 batches | loss  0.57 | total loss  1.25 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  2200/ 2634 batches | loss  0.79 | total loss  1.25 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  2201/ 2634 batches | loss  0.60 | total loss  1.25 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  2202/ 2634 batches | loss  1.27 | total loss  1.25 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |  2203/ 2634 batches | loss  0.78 | total loss  1.25 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  2204/ 2634 batches | loss  0.86

| epoch   1 |  2274/ 2634 batches | loss  1.28 | total loss  1.25 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |  2275/ 2634 batches | loss  2.49 | total loss  1.25 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  2276/ 2634 batches | loss  0.99 | total loss  1.25 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |  2277/ 2634 batches | loss  0.69 | total loss  1.25 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |  2278/ 2634 batches | loss  0.55 | total loss  1.25 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  2279/ 2634 batches | loss  1.81 | total loss  1.25 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  2280/ 2634 batches | loss  1.04 | total loss  1.25 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  2281/ 2634 batches | loss  1.25 | total loss  1.25 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  2282/ 2634 batches | loss  0.74 | total loss  1.25 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |  2283/ 2634 batches | loss  1.52

| epoch   1 |  2352/ 2634 batches | loss  0.67 | total loss  1.25 | accuracy  0.88 | total accuracy  0.50
| epoch   1 |  2353/ 2634 batches | loss  1.59 | total loss  1.25 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  2354/ 2634 batches | loss  0.70 | total loss  1.25 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  2355/ 2634 batches | loss  0.58 | total loss  1.25 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |  2356/ 2634 batches | loss  0.94 | total loss  1.25 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  2357/ 2634 batches | loss  1.20 | total loss  1.25 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  2358/ 2634 batches | loss  0.25 | total loss  1.25 | accuracy  0.88 | total accuracy  0.50
| epoch   1 |  2359/ 2634 batches | loss  0.85 | total loss  1.25 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  2360/ 2634 batches | loss  0.79 | total loss  1.25 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  2361/ 2634 batches | loss  1.45

| epoch   1 |  2430/ 2634 batches | loss  1.08 | total loss  1.25 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  2431/ 2634 batches | loss  1.10 | total loss  1.25 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  2432/ 2634 batches | loss  0.60 | total loss  1.25 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |  2433/ 2634 batches | loss  0.54 | total loss  1.25 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  2434/ 2634 batches | loss  0.67 | total loss  1.25 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  2435/ 2634 batches | loss  1.13 | total loss  1.25 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  2436/ 2634 batches | loss  0.61 | total loss  1.25 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  2437/ 2634 batches | loss  1.21 | total loss  1.25 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  2438/ 2634 batches | loss  1.36 | total loss  1.25 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  2439/ 2634 batches | loss  1.15

| epoch   1 |  2508/ 2634 batches | loss  2.52 | total loss  1.24 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  2509/ 2634 batches | loss  2.79 | total loss  1.24 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |  2510/ 2634 batches | loss  0.90 | total loss  1.24 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  2511/ 2634 batches | loss  1.38 | total loss  1.24 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  2512/ 2634 batches | loss  5.67 | total loss  1.24 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |  2513/ 2634 batches | loss  5.45 | total loss  1.24 | accuracy  0.25 | total accuracy  0.50
| epoch   1 |  2514/ 2634 batches | loss  1.34 | total loss  1.24 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  2515/ 2634 batches | loss  0.57 | total loss  1.24 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |  2516/ 2634 batches | loss  1.39 | total loss  1.24 | accuracy  0.75 | total accuracy  0.50
| epoch   1 |  2517/ 2634 batches | loss  5.36

| epoch   1 |  2586/ 2634 batches | loss  3.29 | total loss  1.25 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  2587/ 2634 batches | loss  3.28 | total loss  1.25 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  2588/ 2634 batches | loss  1.26 | total loss  1.25 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  2589/ 2634 batches | loss  1.09 | total loss  1.25 | accuracy  0.50 | total accuracy  0.50
| epoch   1 |  2590/ 2634 batches | loss  1.56 | total loss  1.25 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  2591/ 2634 batches | loss  1.71 | total loss  1.25 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  2592/ 2634 batches | loss  2.59 | total loss  1.25 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  2593/ 2634 batches | loss  0.96 | total loss  1.25 | accuracy  0.38 | total accuracy  0.50
| epoch   1 |  2594/ 2634 batches | loss  1.49 | total loss  1.25 | accuracy  0.62 | total accuracy  0.50
| epoch   1 |  2595/ 2634 batches | loss  2.39

| epoch   2 |    28/ 2634 batches | loss  0.93 | total loss  0.04 | accuracy  0.25 | total accuracy  0.01
| epoch   2 |    29/ 2634 batches | loss  3.44 | total loss  0.04 | accuracy  0.38 | total accuracy  0.01
| epoch   2 |    30/ 2634 batches | loss  2.33 | total loss  0.04 | accuracy  0.75 | total accuracy  0.01
| epoch   2 |    31/ 2634 batches | loss  7.77 | total loss  0.04 | accuracy  0.25 | total accuracy  0.01
| epoch   2 |    32/ 2634 batches | loss  5.79 | total loss  0.04 | accuracy  0.38 | total accuracy  0.01
| epoch   2 |    33/ 2634 batches | loss  1.27 | total loss  0.04 | accuracy  0.75 | total accuracy  0.01
| epoch   2 |    34/ 2634 batches | loss  0.67 | total loss  0.04 | accuracy  0.62 | total accuracy  0.01
| epoch   2 |    35/ 2634 batches | loss  2.44 | total loss  0.04 | accuracy  0.38 | total accuracy  0.01
| epoch   2 |    36/ 2634 batches | loss  2.81 | total loss  0.05 | accuracy  0.50 | total accuracy  0.01
| epoch   2 |    37/ 2634 batches | loss  3.81

| epoch   2 |   106/ 2634 batches | loss  0.78 | total loss  0.08 | accuracy  0.38 | total accuracy  0.02
| epoch   2 |   107/ 2634 batches | loss  0.82 | total loss  0.08 | accuracy  0.62 | total accuracy  0.02
| epoch   2 |   108/ 2634 batches | loss  0.46 | total loss  0.08 | accuracy  0.75 | total accuracy  0.02
| epoch   2 |   109/ 2634 batches | loss  2.04 | total loss  0.08 | accuracy  0.38 | total accuracy  0.02
| epoch   2 |   110/ 2634 batches | loss  1.75 | total loss  0.08 | accuracy  0.50 | total accuracy  0.02
| epoch   2 |   111/ 2634 batches | loss  0.82 | total loss  0.09 | accuracy  0.50 | total accuracy  0.02
| epoch   2 |   112/ 2634 batches | loss  0.83 | total loss  0.09 | accuracy  0.62 | total accuracy  0.02
| epoch   2 |   113/ 2634 batches | loss  2.69 | total loss  0.09 | accuracy  0.25 | total accuracy  0.02
| epoch   2 |   114/ 2634 batches | loss  0.58 | total loss  0.09 | accuracy  0.62 | total accuracy  0.02
| epoch   2 |   115/ 2634 batches | loss  0.77

| epoch   2 |   184/ 2634 batches | loss  1.13 | total loss  0.11 | accuracy  0.62 | total accuracy  0.03
| epoch   2 |   185/ 2634 batches | loss  2.37 | total loss  0.11 | accuracy  0.38 | total accuracy  0.03
| epoch   2 |   186/ 2634 batches | loss  1.36 | total loss  0.11 | accuracy  0.62 | total accuracy  0.03
| epoch   2 |   187/ 2634 batches | loss  0.54 | total loss  0.11 | accuracy  0.75 | total accuracy  0.03
| epoch   2 |   188/ 2634 batches | loss  0.43 | total loss  0.11 | accuracy  0.88 | total accuracy  0.03
| epoch   2 |   189/ 2634 batches | loss  0.90 | total loss  0.11 | accuracy  0.50 | total accuracy  0.03
| epoch   2 |   190/ 2634 batches | loss  0.85 | total loss  0.11 | accuracy  0.62 | total accuracy  0.03
| epoch   2 |   191/ 2634 batches | loss  0.62 | total loss  0.11 | accuracy  0.75 | total accuracy  0.03
| epoch   2 |   192/ 2634 batches | loss  0.60 | total loss  0.11 | accuracy  0.62 | total accuracy  0.03
| epoch   2 |   193/ 2634 batches | loss  1.03

| epoch   2 |   262/ 2634 batches | loss  1.83 | total loss  0.14 | accuracy  0.50 | total accuracy  0.05
| epoch   2 |   263/ 2634 batches | loss  1.24 | total loss  0.14 | accuracy  0.50 | total accuracy  0.05
| epoch   2 |   264/ 2634 batches | loss  1.48 | total loss  0.14 | accuracy  0.38 | total accuracy  0.05
| epoch   2 |   265/ 2634 batches | loss  1.18 | total loss  0.14 | accuracy  0.62 | total accuracy  0.05
| epoch   2 |   266/ 2634 batches | loss  0.94 | total loss  0.14 | accuracy  0.62 | total accuracy  0.05
| epoch   2 |   267/ 2634 batches | loss  1.36 | total loss  0.14 | accuracy  0.25 | total accuracy  0.05
| epoch   2 |   268/ 2634 batches | loss  0.73 | total loss  0.14 | accuracy  0.62 | total accuracy  0.05
| epoch   2 |   269/ 2634 batches | loss  1.44 | total loss  0.15 | accuracy  0.50 | total accuracy  0.05
| epoch   2 |   270/ 2634 batches | loss  1.96 | total loss  0.15 | accuracy  0.50 | total accuracy  0.05
| epoch   2 |   271/ 2634 batches | loss  1.32

| epoch   2 |   340/ 2634 batches | loss  1.35 | total loss  0.17 | accuracy  0.50 | total accuracy  0.06
| epoch   2 |   341/ 2634 batches | loss  0.71 | total loss  0.17 | accuracy  0.50 | total accuracy  0.06
| epoch   2 |   342/ 2634 batches | loss  1.69 | total loss  0.17 | accuracy  0.50 | total accuracy  0.06
| epoch   2 |   343/ 2634 batches | loss  0.79 | total loss  0.17 | accuracy  0.75 | total accuracy  0.06
| epoch   2 |   344/ 2634 batches | loss  2.25 | total loss  0.17 | accuracy  0.50 | total accuracy  0.06
| epoch   2 |   345/ 2634 batches | loss  0.92 | total loss  0.17 | accuracy  0.62 | total accuracy  0.06
| epoch   2 |   346/ 2634 batches | loss  0.74 | total loss  0.18 | accuracy  0.38 | total accuracy  0.06
| epoch   2 |   347/ 2634 batches | loss  1.24 | total loss  0.18 | accuracy  0.62 | total accuracy  0.06
| epoch   2 |   348/ 2634 batches | loss  1.55 | total loss  0.18 | accuracy  0.50 | total accuracy  0.06
| epoch   2 |   349/ 2634 batches | loss  1.32

| epoch   2 |   418/ 2634 batches | loss  1.38 | total loss  0.21 | accuracy  0.38 | total accuracy  0.07
| epoch   2 |   419/ 2634 batches | loss  0.73 | total loss  0.21 | accuracy  0.62 | total accuracy  0.07
| epoch   2 |   420/ 2634 batches | loss  0.47 | total loss  0.21 | accuracy  0.75 | total accuracy  0.07
| epoch   2 |   421/ 2634 batches | loss  1.90 | total loss  0.21 | accuracy  0.50 | total accuracy  0.07
| epoch   2 |   422/ 2634 batches | loss  1.82 | total loss  0.21 | accuracy  0.50 | total accuracy  0.07
| epoch   2 |   423/ 2634 batches | loss  0.47 | total loss  0.21 | accuracy  0.75 | total accuracy  0.07
| epoch   2 |   424/ 2634 batches | loss  0.85 | total loss  0.21 | accuracy  0.38 | total accuracy  0.07
| epoch   2 |   425/ 2634 batches | loss  0.65 | total loss  0.21 | accuracy  0.38 | total accuracy  0.07
| epoch   2 |   426/ 2634 batches | loss  0.93 | total loss  0.21 | accuracy  0.38 | total accuracy  0.07
| epoch   2 |   427/ 2634 batches | loss  1.11

| epoch   2 |   496/ 2634 batches | loss  0.92 | total loss  0.23 | accuracy  0.62 | total accuracy  0.08
| epoch   2 |   497/ 2634 batches | loss  1.13 | total loss  0.23 | accuracy  0.38 | total accuracy  0.08
| epoch   2 |   498/ 2634 batches | loss  0.76 | total loss  0.23 | accuracy  0.62 | total accuracy  0.08
| epoch   2 |   499/ 2634 batches | loss  2.07 | total loss  0.23 | accuracy  0.50 | total accuracy  0.08
| epoch   2 |   500/ 2634 batches | loss  2.08 | total loss  0.23 | accuracy  0.12 | total accuracy  0.08
| epoch   2 |   501/ 2634 batches | loss  1.11 | total loss  0.23 | accuracy  0.50 | total accuracy  0.08
| epoch   2 |   502/ 2634 batches | loss  0.76 | total loss  0.23 | accuracy  0.75 | total accuracy  0.08
| epoch   2 |   503/ 2634 batches | loss  2.27 | total loss  0.23 | accuracy  0.38 | total accuracy  0.08
| epoch   2 |   504/ 2634 batches | loss  0.70 | total loss  0.23 | accuracy  0.75 | total accuracy  0.08
| epoch   2 |   505/ 2634 batches | loss  0.77

| epoch   2 |   574/ 2634 batches | loss  0.85 | total loss  0.26 | accuracy  0.50 | total accuracy  0.09
| epoch   2 |   575/ 2634 batches | loss  0.72 | total loss  0.26 | accuracy  0.50 | total accuracy  0.09
| epoch   2 |   576/ 2634 batches | loss  0.72 | total loss  0.26 | accuracy  0.62 | total accuracy  0.09
| epoch   2 |   577/ 2634 batches | loss  0.60 | total loss  0.26 | accuracy  0.75 | total accuracy  0.09
| epoch   2 |   578/ 2634 batches | loss  2.11 | total loss  0.26 | accuracy  0.12 | total accuracy  0.09
| epoch   2 |   579/ 2634 batches | loss  1.01 | total loss  0.26 | accuracy  0.38 | total accuracy  0.09
| epoch   2 |   580/ 2634 batches | loss  0.70 | total loss  0.26 | accuracy  0.75 | total accuracy  0.09
| epoch   2 |   581/ 2634 batches | loss  1.05 | total loss  0.26 | accuracy  0.38 | total accuracy  0.09
| epoch   2 |   582/ 2634 batches | loss  1.15 | total loss  0.26 | accuracy  0.38 | total accuracy  0.09
| epoch   2 |   583/ 2634 batches | loss  1.08

| epoch   2 |   653/ 2634 batches | loss  2.10 | total loss  0.28 | accuracy  0.25 | total accuracy  0.10
| epoch   2 |   654/ 2634 batches | loss  0.60 | total loss  0.28 | accuracy  0.62 | total accuracy  0.10
| epoch   2 |   655/ 2634 batches | loss  0.81 | total loss  0.28 | accuracy  0.50 | total accuracy  0.10
| epoch   2 |   656/ 2634 batches | loss  1.23 | total loss  0.28 | accuracy  0.38 | total accuracy  0.10
| epoch   2 |   657/ 2634 batches | loss  0.84 | total loss  0.28 | accuracy  0.38 | total accuracy  0.10
| epoch   2 |   658/ 2634 batches | loss  0.73 | total loss  0.28 | accuracy  0.62 | total accuracy  0.10
| epoch   2 |   659/ 2634 batches | loss  1.50 | total loss  0.28 | accuracy  0.50 | total accuracy  0.10
| epoch   2 |   660/ 2634 batches | loss  0.98 | total loss  0.28 | accuracy  0.62 | total accuracy  0.10
| epoch   2 |   661/ 2634 batches | loss  0.81 | total loss  0.28 | accuracy  0.50 | total accuracy  0.10
| epoch   2 |   662/ 2634 batches | loss  1.27

| epoch   2 |   732/ 2634 batches | loss  0.60 | total loss  0.31 | accuracy  0.75 | total accuracy  0.11
| epoch   2 |   733/ 2634 batches | loss  0.68 | total loss  0.31 | accuracy  0.62 | total accuracy  0.11
| epoch   2 |   734/ 2634 batches | loss  0.77 | total loss  0.31 | accuracy  0.88 | total accuracy  0.11
| epoch   2 |   735/ 2634 batches | loss  1.50 | total loss  0.31 | accuracy  0.50 | total accuracy  0.11
| epoch   2 |   736/ 2634 batches | loss  0.47 | total loss  0.31 | accuracy  0.75 | total accuracy  0.11
| epoch   2 |   737/ 2634 batches | loss  0.71 | total loss  0.31 | accuracy  0.62 | total accuracy  0.11
| epoch   2 |   738/ 2634 batches | loss  0.61 | total loss  0.31 | accuracy  0.62 | total accuracy  0.11
| epoch   2 |   739/ 2634 batches | loss  1.04 | total loss  0.31 | accuracy  0.38 | total accuracy  0.11
| epoch   2 |   740/ 2634 batches | loss  0.79 | total loss  0.31 | accuracy  0.50 | total accuracy  0.11
| epoch   2 |   741/ 2634 batches | loss  0.88

| epoch   2 |   810/ 2634 batches | loss  1.57 | total loss  0.33 | accuracy  0.50 | total accuracy  0.12
| epoch   2 |   811/ 2634 batches | loss  1.10 | total loss  0.33 | accuracy  0.12 | total accuracy  0.12
| epoch   2 |   812/ 2634 batches | loss  1.03 | total loss  0.33 | accuracy  0.50 | total accuracy  0.12
| epoch   2 |   813/ 2634 batches | loss  1.23 | total loss  0.33 | accuracy  0.25 | total accuracy  0.12
| epoch   2 |   814/ 2634 batches | loss  0.52 | total loss  0.33 | accuracy  0.75 | total accuracy  0.12
| epoch   2 |   815/ 2634 batches | loss  0.95 | total loss  0.33 | accuracy  0.50 | total accuracy  0.12
| epoch   2 |   816/ 2634 batches | loss  0.75 | total loss  0.33 | accuracy  0.62 | total accuracy  0.12
| epoch   2 |   817/ 2634 batches | loss  0.74 | total loss  0.33 | accuracy  0.62 | total accuracy  0.12
| epoch   2 |   818/ 2634 batches | loss  1.00 | total loss  0.33 | accuracy  0.12 | total accuracy  0.12
| epoch   2 |   819/ 2634 batches | loss  1.65

| epoch   2 |   889/ 2634 batches | loss  1.94 | total loss  0.34 | accuracy  0.25 | total accuracy  0.13
| epoch   2 |   890/ 2634 batches | loss  0.96 | total loss  0.34 | accuracy  0.25 | total accuracy  0.13
| epoch   2 |   891/ 2634 batches | loss  1.10 | total loss  0.35 | accuracy  0.50 | total accuracy  0.13
| epoch   2 |   892/ 2634 batches | loss  1.74 | total loss  0.35 | accuracy  0.25 | total accuracy  0.13
| epoch   2 |   893/ 2634 batches | loss  0.68 | total loss  0.35 | accuracy  0.50 | total accuracy  0.13
| epoch   2 |   894/ 2634 batches | loss  0.81 | total loss  0.35 | accuracy  0.50 | total accuracy  0.13
| epoch   2 |   895/ 2634 batches | loss  0.95 | total loss  0.35 | accuracy  0.62 | total accuracy  0.13
| epoch   2 |   896/ 2634 batches | loss  1.11 | total loss  0.35 | accuracy  0.50 | total accuracy  0.13
| epoch   2 |   897/ 2634 batches | loss  0.63 | total loss  0.35 | accuracy  0.62 | total accuracy  0.13
| epoch   2 |   898/ 2634 batches | loss  1.02

| epoch   2 |   968/ 2634 batches | loss  0.58 | total loss  0.36 | accuracy  0.88 | total accuracy  0.14
| epoch   2 |   969/ 2634 batches | loss  1.65 | total loss  0.36 | accuracy  0.25 | total accuracy  0.14
| epoch   2 |   970/ 2634 batches | loss  1.22 | total loss  0.36 | accuracy  0.25 | total accuracy  0.14
| epoch   2 |   971/ 2634 batches | loss  0.88 | total loss  0.36 | accuracy  0.62 | total accuracy  0.14
| epoch   2 |   972/ 2634 batches | loss  1.97 | total loss  0.36 | accuracy  0.38 | total accuracy  0.14
| epoch   2 |   973/ 2634 batches | loss  2.51 | total loss  0.37 | accuracy  0.38 | total accuracy  0.14
| epoch   2 |   974/ 2634 batches | loss  0.48 | total loss  0.37 | accuracy  0.88 | total accuracy  0.14
| epoch   2 |   975/ 2634 batches | loss  1.11 | total loss  0.37 | accuracy  0.38 | total accuracy  0.14
| epoch   2 |   976/ 2634 batches | loss  0.48 | total loss  0.37 | accuracy  0.75 | total accuracy  0.14
| epoch   2 |   977/ 2634 batches | loss  0.53

| epoch   2 |  1047/ 2634 batches | loss  0.74 | total loss  0.38 | accuracy  0.62 | total accuracy  0.14
| epoch   2 |  1048/ 2634 batches | loss  0.98 | total loss  0.38 | accuracy  0.50 | total accuracy  0.14
| epoch   2 |  1049/ 2634 batches | loss  0.91 | total loss  0.38 | accuracy  0.62 | total accuracy  0.14
| epoch   2 |  1050/ 2634 batches | loss  0.87 | total loss  0.38 | accuracy  0.50 | total accuracy  0.14
| epoch   2 |  1051/ 2634 batches | loss  0.96 | total loss  0.38 | accuracy  0.50 | total accuracy  0.14
| epoch   2 |  1052/ 2634 batches | loss  0.54 | total loss  0.38 | accuracy  0.75 | total accuracy  0.14
| epoch   2 |  1053/ 2634 batches | loss  0.71 | total loss  0.38 | accuracy  0.50 | total accuracy  0.14
| epoch   2 |  1054/ 2634 batches | loss  0.74 | total loss  0.38 | accuracy  0.75 | total accuracy  0.14
| epoch   2 |  1055/ 2634 batches | loss  0.64 | total loss  0.38 | accuracy  0.75 | total accuracy  0.14
| epoch   2 |  1056/ 2634 batches | loss  0.84

| epoch   2 |  1125/ 2634 batches | loss  0.75 | total loss  0.40 | accuracy  0.50 | total accuracy  0.15
| epoch   2 |  1126/ 2634 batches | loss  2.07 | total loss  0.40 | accuracy  0.38 | total accuracy  0.15
| epoch   2 |  1127/ 2634 batches | loss  0.89 | total loss  0.40 | accuracy  0.75 | total accuracy  0.15
| epoch   2 |  1128/ 2634 batches | loss  2.13 | total loss  0.40 | accuracy  0.38 | total accuracy  0.15
| epoch   2 |  1129/ 2634 batches | loss  0.85 | total loss  0.40 | accuracy  0.50 | total accuracy  0.15
| epoch   2 |  1130/ 2634 batches | loss  1.21 | total loss  0.40 | accuracy  0.62 | total accuracy  0.15
| epoch   2 |  1131/ 2634 batches | loss  2.28 | total loss  0.40 | accuracy  0.50 | total accuracy  0.15
| epoch   2 |  1132/ 2634 batches | loss  0.03 | total loss  0.40 | accuracy  1.00 | total accuracy  0.15
| epoch   2 |  1133/ 2634 batches | loss  2.80 | total loss  0.40 | accuracy  0.38 | total accuracy  0.15
| epoch   2 |  1134/ 2634 batches | loss  1.41

| epoch   2 |  1203/ 2634 batches | loss  2.70 | total loss  0.42 | accuracy  0.50 | total accuracy  0.16
| epoch   2 |  1204/ 2634 batches | loss  2.99 | total loss  0.42 | accuracy  0.50 | total accuracy  0.16
| epoch   2 |  1205/ 2634 batches | loss  1.37 | total loss  0.42 | accuracy  0.62 | total accuracy  0.16
| epoch   2 |  1206/ 2634 batches | loss  0.95 | total loss  0.42 | accuracy  0.25 | total accuracy  0.16
| epoch   2 |  1207/ 2634 batches | loss  3.00 | total loss  0.42 | accuracy  0.38 | total accuracy  0.16
| epoch   2 |  1208/ 2634 batches | loss  4.61 | total loss  0.42 | accuracy  0.25 | total accuracy  0.16
| epoch   2 |  1209/ 2634 batches | loss  2.70 | total loss  0.42 | accuracy  0.25 | total accuracy  0.16
| epoch   2 |  1210/ 2634 batches | loss  1.38 | total loss  0.43 | accuracy  0.38 | total accuracy  0.16
| epoch   2 |  1211/ 2634 batches | loss  2.48 | total loss  0.43 | accuracy  0.25 | total accuracy  0.16
| epoch   2 |  1212/ 2634 batches | loss  1.95

| epoch   2 |  1281/ 2634 batches | loss  0.67 | total loss  0.44 | accuracy  0.50 | total accuracy  0.17
| epoch   2 |  1282/ 2634 batches | loss  0.54 | total loss  0.44 | accuracy  0.75 | total accuracy  0.17
| epoch   2 |  1283/ 2634 batches | loss  1.35 | total loss  0.44 | accuracy  0.25 | total accuracy  0.17
| epoch   2 |  1284/ 2634 batches | loss  0.90 | total loss  0.44 | accuracy  0.50 | total accuracy  0.17
| epoch   2 |  1285/ 2634 batches | loss  0.98 | total loss  0.44 | accuracy  0.38 | total accuracy  0.17
| epoch   2 |  1286/ 2634 batches | loss  1.37 | total loss  0.44 | accuracy  0.25 | total accuracy  0.17
| epoch   2 |  1287/ 2634 batches | loss  0.69 | total loss  0.44 | accuracy  0.50 | total accuracy  0.17
| epoch   2 |  1288/ 2634 batches | loss  1.52 | total loss  0.44 | accuracy  0.50 | total accuracy  0.17
| epoch   2 |  1289/ 2634 batches | loss  0.68 | total loss  0.44 | accuracy  0.75 | total accuracy  0.17
| epoch   2 |  1290/ 2634 batches | loss  0.94

| epoch   2 |  1359/ 2634 batches | loss  0.81 | total loss  0.46 | accuracy  0.38 | total accuracy  0.17
| epoch   2 |  1360/ 2634 batches | loss  0.98 | total loss  0.46 | accuracy  0.50 | total accuracy  0.17
| epoch   2 |  1361/ 2634 batches | loss  0.70 | total loss  0.46 | accuracy  0.75 | total accuracy  0.17
| epoch   2 |  1362/ 2634 batches | loss  1.61 | total loss  0.46 | accuracy  0.50 | total accuracy  0.17
| epoch   2 |  1363/ 2634 batches | loss  1.32 | total loss  0.46 | accuracy  0.38 | total accuracy  0.17
| epoch   2 |  1364/ 2634 batches | loss  1.13 | total loss  0.46 | accuracy  0.62 | total accuracy  0.17
| epoch   2 |  1365/ 2634 batches | loss  1.60 | total loss  0.46 | accuracy  0.62 | total accuracy  0.17
| epoch   2 |  1366/ 2634 batches | loss  0.53 | total loss  0.46 | accuracy  0.88 | total accuracy  0.17
| epoch   2 |  1367/ 2634 batches | loss  0.54 | total loss  0.46 | accuracy  0.75 | total accuracy  0.17
| epoch   2 |  1368/ 2634 batches | loss  1.22

| epoch   2 |  1438/ 2634 batches | loss  1.12 | total loss  0.47 | accuracy  0.38 | total accuracy  0.18
| epoch   2 |  1439/ 2634 batches | loss  0.39 | total loss  0.47 | accuracy  0.88 | total accuracy  0.18
| epoch   2 |  1440/ 2634 batches | loss  0.43 | total loss  0.47 | accuracy  0.88 | total accuracy  0.18
| epoch   2 |  1441/ 2634 batches | loss  1.61 | total loss  0.47 | accuracy  0.62 | total accuracy  0.18
| epoch   2 |  1442/ 2634 batches | loss  0.91 | total loss  0.47 | accuracy  0.62 | total accuracy  0.18
| epoch   2 |  1443/ 2634 batches | loss  0.79 | total loss  0.47 | accuracy  0.62 | total accuracy  0.18
| epoch   2 |  1444/ 2634 batches | loss  0.71 | total loss  0.47 | accuracy  0.75 | total accuracy  0.18
| epoch   2 |  1445/ 2634 batches | loss  1.45 | total loss  0.47 | accuracy  0.62 | total accuracy  0.18
| epoch   2 |  1446/ 2634 batches | loss  2.71 | total loss  0.47 | accuracy  0.25 | total accuracy  0.18
| epoch   2 |  1447/ 2634 batches | loss  1.39

| epoch   2 |  1516/ 2634 batches | loss  1.06 | total loss  0.50 | accuracy  0.50 | total accuracy  0.18
| epoch   2 |  1517/ 2634 batches | loss  1.32 | total loss  0.50 | accuracy  0.62 | total accuracy  0.18
| epoch   2 |  1518/ 2634 batches | loss  2.09 | total loss  0.50 | accuracy  0.50 | total accuracy  0.18
| epoch   2 |  1519/ 2634 batches | loss  0.44 | total loss  0.50 | accuracy  0.88 | total accuracy  0.19
| epoch   2 |  1520/ 2634 batches | loss  2.66 | total loss  0.50 | accuracy  0.38 | total accuracy  0.19
| epoch   2 |  1521/ 2634 batches | loss  0.63 | total loss  0.50 | accuracy  0.50 | total accuracy  0.19
| epoch   2 |  1522/ 2634 batches | loss  1.32 | total loss  0.50 | accuracy  0.38 | total accuracy  0.19
| epoch   2 |  1523/ 2634 batches | loss  1.67 | total loss  0.50 | accuracy  0.25 | total accuracy  0.19
| epoch   2 |  1524/ 2634 batches | loss  0.80 | total loss  0.50 | accuracy  0.62 | total accuracy  0.19
| epoch   2 |  1525/ 2634 batches | loss  0.87

| epoch   2 |  1594/ 2634 batches | loss  1.20 | total loss  0.51 | accuracy  0.25 | total accuracy  0.19
| epoch   2 |  1595/ 2634 batches | loss  0.82 | total loss  0.51 | accuracy  0.75 | total accuracy  0.19
| epoch   2 |  1596/ 2634 batches | loss  3.19 | total loss  0.51 | accuracy  0.25 | total accuracy  0.19
| epoch   2 |  1597/ 2634 batches | loss  1.97 | total loss  0.51 | accuracy  0.50 | total accuracy  0.19
| epoch   2 |  1598/ 2634 batches | loss  0.81 | total loss  0.51 | accuracy  0.38 | total accuracy  0.19
| epoch   2 |  1599/ 2634 batches | loss  2.01 | total loss  0.51 | accuracy  0.38 | total accuracy  0.19
| epoch   2 |  1600/ 2634 batches | loss  0.93 | total loss  0.51 | accuracy  0.62 | total accuracy  0.19
| epoch   2 |  1601/ 2634 batches | loss  0.93 | total loss  0.51 | accuracy  0.38 | total accuracy  0.19
| epoch   2 |  1602/ 2634 batches | loss  0.87 | total loss  0.51 | accuracy  0.50 | total accuracy  0.19
| epoch   2 |  1603/ 2634 batches | loss  1.56

| epoch   2 |  1673/ 2634 batches | loss  0.57 | total loss  0.52 | accuracy  0.62 | total accuracy  0.20
| epoch   2 |  1674/ 2634 batches | loss  0.90 | total loss  0.52 | accuracy  0.38 | total accuracy  0.20
| epoch   2 |  1675/ 2634 batches | loss  1.42 | total loss  0.52 | accuracy  0.00 | total accuracy  0.20
| epoch   2 |  1676/ 2634 batches | loss  0.86 | total loss  0.52 | accuracy  0.62 | total accuracy  0.20
| epoch   2 |  1677/ 2634 batches | loss  1.09 | total loss  0.52 | accuracy  0.62 | total accuracy  0.20
| epoch   2 |  1678/ 2634 batches | loss  1.74 | total loss  0.52 | accuracy  0.50 | total accuracy  0.20
| epoch   2 |  1679/ 2634 batches | loss  0.91 | total loss  0.52 | accuracy  0.62 | total accuracy  0.20
| epoch   2 |  1680/ 2634 batches | loss  0.89 | total loss  0.52 | accuracy  0.25 | total accuracy  0.20
| epoch   2 |  1681/ 2634 batches | loss  1.08 | total loss  0.52 | accuracy  0.62 | total accuracy  0.20
| epoch   2 |  1682/ 2634 batches | loss  1.16

| epoch   2 |  1752/ 2634 batches | loss  0.70 | total loss  0.53 | accuracy  0.75 | total accuracy  0.20
| epoch   2 |  1753/ 2634 batches | loss  0.49 | total loss  0.53 | accuracy  0.75 | total accuracy  0.20
| epoch   2 |  1754/ 2634 batches | loss  1.88 | total loss  0.53 | accuracy  0.25 | total accuracy  0.20
| epoch   2 |  1755/ 2634 batches | loss  0.72 | total loss  0.53 | accuracy  0.50 | total accuracy  0.20
| epoch   2 |  1756/ 2634 batches | loss  0.39 | total loss  0.53 | accuracy  0.88 | total accuracy  0.20
| epoch   2 |  1757/ 2634 batches | loss  1.19 | total loss  0.53 | accuracy  0.62 | total accuracy  0.20
| epoch   2 |  1758/ 2634 batches | loss  1.00 | total loss  0.53 | accuracy  0.75 | total accuracy  0.20
| epoch   2 |  1759/ 2634 batches | loss  1.75 | total loss  0.53 | accuracy  0.25 | total accuracy  0.20
| epoch   2 |  1760/ 2634 batches | loss  1.01 | total loss  0.53 | accuracy  0.38 | total accuracy  0.20
| epoch   2 |  1761/ 2634 batches | loss  0.47

| epoch   2 |  1831/ 2634 batches | loss  0.66 | total loss  0.55 | accuracy  0.62 | total accuracy  0.21
| epoch   2 |  1832/ 2634 batches | loss  0.72 | total loss  0.55 | accuracy  0.50 | total accuracy  0.21
| epoch   2 |  1833/ 2634 batches | loss  0.83 | total loss  0.55 | accuracy  0.62 | total accuracy  0.21
| epoch   2 |  1834/ 2634 batches | loss  0.92 | total loss  0.55 | accuracy  0.38 | total accuracy  0.21
| epoch   2 |  1835/ 2634 batches | loss  0.79 | total loss  0.55 | accuracy  0.50 | total accuracy  0.21
| epoch   2 |  1836/ 2634 batches | loss  0.95 | total loss  0.55 | accuracy  0.38 | total accuracy  0.21
| epoch   2 |  1837/ 2634 batches | loss  0.74 | total loss  0.55 | accuracy  0.50 | total accuracy  0.21
| epoch   2 |  1838/ 2634 batches | loss  0.69 | total loss  0.55 | accuracy  0.62 | total accuracy  0.21
| epoch   2 |  1839/ 2634 batches | loss  1.21 | total loss  0.55 | accuracy  0.50 | total accuracy  0.21
| epoch   2 |  1840/ 2634 batches | loss  1.54

KeyboardInterrupt: 

# Draft

In [None]:
for ctx,pos_token,pos_ctx,label in train_loader:
    print(code2string(ctx[0],dict_token))
    print(pos_ctx[0])
    print(label[0].item())
    break

In [None]:
torch.LongTensor([0])

In [6]:
50 % 100

50

219

0.25

In [11]:
ctx

tensor([[-0.1767,  0.7985],
        [-0.3940,  0.8467],
        [-0.4016,  0.7627],
        [-0.4417,  0.9521],
        [-0.2279,  0.7989],
        [-0.0924,  0.3308],
        [-0.2382,  0.8198],
        [-0.6877,  1.0233]], device='cuda:0', grad_fn=<AddmmBackward>)

In [12]:
label

tensor([1, 0, 0, 0, 1, 0, 0, 0], device='cuda:0')