In [1]:
import transformers
from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup
import torch

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from collections import defaultdict
from textwrap import wrap

from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

# Splits data into batches of defined size
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

from tqdm import tqdm


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
file_errors_location = 'ArithOpsTrain.xlsx'
df = pd.read_excel(file_errors_location)

In [3]:
from torchtext.data import get_tokenizer

# Downloads GloVe and FastText
#global_vectors = GloVe(name='840B', dim=300)

# ----------- Text Preprocessing -----------
#nlp = spacy.load("en_core_web_md")
tokenizer = get_tokenizer("basic_english")

In [4]:
dataset = []
vocab, vocab_target = [], []
for idx, row in enumerate(df.iterrows()):
    if idx == 0:
        print(row[1][1], row[1][2], row[1][3], row[1][4], row[1][5])
        continue 
    
    context = tokenizer(row[1][1])
    vocab.extend(context)
    
    ques = tokenizer(row[1][2])
    vocab.extend(ques)

    ans = tokenizer(row[1][3])
    vocab_target.extend(ans)

    ip = [float(x) for x in row[1][4].split()]
    out = float(row[1][5])
    dataset.append((context, ques, ans, ip, out))

Description Question Equation Input Numbers Output


In [5]:
vocab_to_id = {}
ids = 1
for word in vocab:
    if word not in vocab_to_id:
        vocab_to_id[word] = ids
        ids += 1

vocab_target_to_id = {}
idt = 1
for word in vocab_target:
    if word not in vocab_target_to_id:
        vocab_target_to_id[word] = idt
        idt += 1

print(ids, idt)

2617 10


In [6]:
dataset_tokenized = []

for context, ques, ans, ip, out in dataset:
    context_token = []
    for word in context:
        context_token.append(vocab_to_id[word])
    
    ques_token = []
    for word in ques:
        ques_token.append(vocab_to_id[word])

    ans_token = []
    for word in ans:
        ans_token.append(vocab_target_to_id[word])
    
    dataset_tokenized.append((context_token, ques_token, ans_token, ip, out))

In [7]:
dataset_tokenized[0]

([1, 2, 3, 4, 5, 6, 7, 8, 9, 4, 5, 6],
 [10, 11, 12, 13, 14, 15, 4, 5, 16],
 [1, 2, 3],
 [63.0, 50.0],
 113.0)

In [8]:
def split_indices(n, val_pct):

    # Determine size of Validation set
    n_val = int(val_pct * n)

    # Create random permutation of 0 to n-1
    idxs = np.random.permutation(n)
    return idxs[n_val:], idxs[:n_val]

train_indices, val_indices = split_indices(len(dataset), 0.2)

In [73]:
from torch.nn.utils.rnn import pad_sequence

# ----------- Batching the data -----------
def collate_fn(instn):
    context = [torch.Tensor(x[0] + x[1]) for x in instn]
    ques = [torch.Tensor(x[1]) for x in instn]
    ans = [torch.Tensor(x[2]) for x in instn]
    ip = [x[3] for x in instn]
    out = [x[4] for x in instn]

    context_pad = pad_sequence(context, batch_first=True, padding_value=10).long()
    ques_pad = pad_sequence(ques, batch_first=True, padding_value=10).long()
    ans_pad = pad_sequence(ans, batch_first=True, padding_value=10).long()

    return (context_pad, ques_pad, ans_pad, ip, out)


batch_size = 128

train_sampler   = SubsetRandomSampler(train_indices)
train_loader    = DataLoader(dataset_tokenized, batch_size, sampler=train_sampler, collate_fn=collate_fn)

val_sampler     = SubsetRandomSampler(val_indices)
val_loader      = DataLoader(dataset_tokenized, batch_size, sampler=val_sampler, collate_fn=collate_fn)

In [74]:
for i in train_loader:
    print(i)
    break

(tensor([[ 123, 1645,  253,  ...,   10,   10,   10],
        [ 681,  682,  100,  ...,   10,   10,   10],
        [ 223,    2,    3,  ...,   10,   10,   10],
        ...,
        [ 237, 1440,   19,  ...,   10,   10,   10],
        [   3, 1840,   59,  ...,   10,   10,   10],
        [  64,   82, 1239,  ...,   10,   10,   10]]), tensor([[  11,   12, 2039,  ...,   10,   10,   10],
        [  27,   28,  673,  ...,   10,   10,   10],
        [  27,   28,   85,  ...,   10,   10,   10],
        ...,
        [  27,  317,   56,  ...,   10,   10,   10],
        [  27,   28, 1840,  ...,   10,   10,   10],
        [  27,   28,  140,  ...,   10,   10,   10]]), tensor([[ 4,  3,  2, 10, 10],
        [ 1,  1,  2,  3,  5],
        [ 4,  2,  3, 10, 10],
        [ 8,  2,  3, 10, 10],
        [ 8,  2,  3, 10, 10],
        [ 4,  2,  3, 10, 10],
        [ 9,  3,  2, 10, 10],
        [ 8,  2,  3, 10, 10],
        [ 4,  3,  2, 10, 10],
        [ 9,  2,  3, 10, 10],
        [ 8,  2,  3, 10, 10],
        [ 9,  2

In [87]:
# Temporarily leave PositionalEncoding module here. Will be moved somewhere else.

import math
class PositionalEncoding(nn.Module):
    r"""Inject some information about the relative or absolute position of the tokens in the sequence.
        The positional encodings have the same dimension as the embeddings, so that the two can be summed.
        Here, we use sine and cosine functions of different frequencies.
    .. math:
        \text{PosEncoder}(pos, 2i) = sin(pos/10000^(2i/d_model))
        \text{PosEncoder}(pos, 2i+1) = cos(pos/10000^(2i/d_model))
        \text{where pos is the word position and i is the embed idx)
    Args:
        d_model: the embed dim (required).
        dropout: the dropout value (default=0.1).
        max_len: the max. length of the incoming sequence (default=5000).
    Examples:
        >>> pos_encoder = PositionalEncoding(d_model)
    """

    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        r"""Inputs of forward function
        Args:
            x: the sequence fed to the positional encoder model (required).
        Shape:
            x: [sequence length, batch size, embed dim]
            output: [sequence length, batch size, embed dim]
        Examples:
            >>> output = pos_encoder(x)
        """

        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

class TransformerModel(nn.Module):
    """Container module with an encoder, a recurrent or transformer module, and a decoder."""

    def __init__(self, ntoken, ninp, nhead, nhid, nlayers, nout, dropout=0.5):
        super(TransformerModel, self).__init__()
        try:
            from torch.nn import TransformerEncoder, TransformerEncoderLayer
        except:
            raise ImportError('TransformerEncoder module does not exist in PyTorch 1.1 or lower.')
        self.model_type = 'Transformer'
        self.src_mask = None
        self.pos_encoder = PositionalEncoding(ninp, dropout)
        encoder_layers = TransformerEncoderLayer(ninp, nhead, nhid, dropout)
        self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
        self.encoder = nn.Embedding(ntoken, ninp)
        self.ninp = ninp
        self.decoder = nn.Linear(ninp, nout)

        self.init_weights()

    def _generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

    def init_weights(self):
        initrange = 0.1
        nn.init.uniform_(self.encoder.weight, -initrange, initrange)
        nn.init.zeros_(self.decoder.bias)
        nn.init.uniform_(self.decoder.weight, -initrange, initrange)

    def forward(self, src, has_mask=True, max_len=None):
        if has_mask:
            device = src.device
            if self.src_mask is None or self.src_mask.size(0) != len(src):
                mask = self._generate_square_subsequent_mask(len(src)).to(device)
                self.src_mask = mask
        else:
            self.src_mask = None

        B, seq_len = src.shape[0], src.shape[1]

        src = self.encoder(src) * math.sqrt(self.ninp)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src, self.src_mask)
        #print(output.shape)
        x = self.decoder(output)[:, :max_len]

        #logits = torch.zeros(B, 1, idt+1).to(src.device)
        #for i in range(max_len):
        #    x = self.decoder(output)          # B x 11
        #    logits = torch.cat((logits, x.unsqueeze(dim=1)), dim = 1)                                
        
        return torch.softmax(x, dim=-1)

In [88]:
model = TransformerModel(ids+1, 512, 8, 256, 4, 11, 0.2)

device = torch.device("cuda:0")
model.to(device)
opt = torch.optim.AdamW(model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()

In [89]:
def calculate_metric(ans, ip):
    out = []
    for b in range(len(ans)):
        stack = []
        expression = ans[b]
        # iterate over the string in reverse order
        #print(expression[::-1], ip[b])
        for c in expression[::-1]:
    
            # push operand to stack
            if "number" in c:
                idx = int(c[-1])
                if len(ip[b]) <= idx:
                    stack.append(int(ip[b][0]))
                    continue
                stack.append(int(ip[b][idx]))
    
            elif c in "+-/*":
                # pop values from stack can calculate the result
                # push the result onto the stack again
                try:
                    o1 = stack.pop()
                    o2 = stack.pop()
                    #print(o1, o2)
                except:
                    continue
    
                if c == '+':
                    stack.append(o1 + o2)
    
                elif c == '-':
                    stack.append(o1 - o2)
    
                elif c == '*':
                    stack.append(o1 * o2)
    
                elif c == '/':
                    try:
                        stack.append(o1 / o2)
                    except:
                        print(ans[b], ip[b])
        try:
            out.append(stack.pop())
        except:
            out.append(int(ip[b][0]) + int(ip[b][1]))
    return out


In [90]:
id_to_vocab_target = {}
for i, v in vocab_target_to_id.items():
    id_to_vocab_target[v] = i
id_to_vocab_target[0] = "BOS"
id_to_vocab_target[idt] = "EOS"
#id_to_vocab_target

In [91]:
# ----------- Main Training Loop -----------
max_epoch = 15

best_test_acc = 0
for ep in range(1, max_epoch+1):

    epoch_loss = 0

    model.train()
    train_labels = []
    train_pred = []
    correct_train, B_train = 0, 0
    for cont, ques, ans, ip, out in tqdm(train_loader):
        loss = 0
        cont = cont.to(device)
        ques = ques.to(device)
        ans = ans.to(device)

        ans_pred = model(cont, False, ans.shape[1])

        for i in range(ans.shape[1]):
            loss += loss_fn(ans_pred[:, i], ans[:, i]) 

        loss.backward()
        opt.step()
        opt.zero_grad()

        epoch_loss += float(loss)

        pred = []
        for b in range(ans.shape[0]):
            p = []
            for i in range(ans.shape[1]):
                p.append(id_to_vocab_target[torch.argmax(ans_pred[0,i,:]).item()])
            pred.append(p)
        
        if ep > 2:
            outt = calculate_metric(pred, ip)
            #print(outt)
            for i in range(len(out)):
                #print(outt[i], out[i])
                if float(outt[i]) == float(out[i]):
                    correct_train += 1
                else:
                    #print(outt[i], out[i])
                    pass
            B_train += len(out)       

    print("Epoch: ", ep+1, " Training Loss: ", epoch_loss/ len(train_loader))
    #print("Train accuracy: ", accuracy_score(train_labels, train_pred)*50)
    if ep > 2:
        print("Train EM: ", (correct_train/ B_train)*100)

    epoch_loss = 0

    model.eval()
    train_labels = []
    train_pred = []
    correct_train, B_train = 0, 0
    for cont, ques, ans, ip, out in tqdm(val_loader):
        loss = 0
        cont = cont.to(device)
        ques = ques.to(device)
        ans = ans.to(device)

        ans_pred = model(cont, False, ans.shape[1])

        for i in range(ans.shape[1]):
            loss += loss_fn(ans_pred[:, i], ans[:, i]) 

        epoch_loss += float(loss)

        pred = []
        for b in range(ans.shape[0]):
            p = []
            for i in range(ans.shape[1]):
                p.append(id_to_vocab_target[torch.argmax(ans_pred[0,i,:]).item()])
            pred.append(p)
        
        if ep > 2:
            outt = calculate_metric(pred, ip)
            #print(outt)
            for i in range(len(out)):
                #print(outt[i], out[i])
                if float(outt[i]) == float(out[i]):
                    correct_train += 1
                else:
                    #print(outt[i], out[i])
                    pass
            B_train += len(out)       

    print("Epoch: ", ep+1, " Validation Loss: ", epoch_loss/ len(val_loader))
    #print("Train accuracy: ", accuracy_score(train_labels, train_pred)*50)
    if ep > 2:
        print("Val EM: ", (correct_train/ B_train)*100)
    


  0%|          | 0/7 [00:00<?, ?it/s] 29%|██▊       | 2/7 [00:00<00:00, 16.74it/s] 57%|█████▋    | 4/7 [00:00<00:00, 17.33it/s]100%|██████████| 7/7 [00:00<00:00, 22.43it/s]100%|██████████| 7/7 [00:00<00:00, 20.88it/s]


Epoch:  2  Training Loss:  11.332656587873187


  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 44.57it/s]


Epoch:  2  Validation Loss:  11.249484539031982


  0%|          | 0/7 [00:00<?, ?it/s] 43%|████▎     | 3/7 [00:00<00:00, 22.58it/s] 86%|████████▌ | 6/7 [00:00<00:00, 23.12it/s]100%|██████████| 7/7 [00:00<00:00, 25.38it/s]


Epoch:  3  Training Loss:  11.163864135742188


  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 46.97it/s]


Epoch:  3  Validation Loss:  11.228146076202393


  0%|          | 0/7 [00:00<?, ?it/s] 43%|████▎     | 3/7 [00:00<00:00, 24.14it/s] 86%|████████▌ | 6/7 [00:00<00:00, 23.95it/s]100%|██████████| 7/7 [00:00<00:00, 26.46it/s]


Epoch:  4  Training Loss:  11.132614135742188
Train EM:  19.642857142857142


  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 46.80it/s]


Epoch:  4  Validation Loss:  11.263710498809814
Val EM:  20.0


  0%|          | 0/7 [00:00<?, ?it/s] 43%|████▎     | 3/7 [00:00<00:00, 24.24it/s] 86%|████████▌ | 6/7 [00:00<00:00, 23.73it/s]100%|██████████| 7/7 [00:00<00:00, 26.27it/s]


Epoch:  5  Training Loss:  11.163864135742188
Train EM:  19.642857142857142


  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 46.65it/s]


Epoch:  5  Validation Loss:  11.242371082305908
Val EM:  20.0


  0%|          | 0/7 [00:00<?, ?it/s] 43%|████▎     | 3/7 [00:00<00:00, 23.87it/s] 86%|████████▌ | 6/7 [00:00<00:00, 24.23it/s]100%|██████████| 7/7 [00:00<00:00, 26.66it/s]


Epoch:  6  Training Loss:  11.132614135742188
Train EM:  19.642857142857142


  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 46.78it/s]


Epoch:  6  Validation Loss:  11.249484539031982
Val EM:  20.0


  0%|          | 0/7 [00:00<?, ?it/s] 43%|████▎     | 3/7 [00:00<00:00, 23.80it/s] 86%|████████▌ | 6/7 [00:00<00:00, 23.81it/s]100%|██████████| 7/7 [00:00<00:00, 26.28it/s]


Epoch:  7  Training Loss:  11.148239135742188
Train EM:  19.642857142857142


  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 46.35it/s]


Epoch:  7  Validation Loss:  11.228146076202393
Val EM:  20.0


  0%|          | 0/7 [00:00<?, ?it/s] 43%|████▎     | 3/7 [00:00<00:00, 23.87it/s] 86%|████████▌ | 6/7 [00:00<00:00, 23.96it/s]100%|██████████| 7/7 [00:00<00:00, 26.42it/s]


Epoch:  8  Training Loss:  11.163864135742188
Train EM:  19.642857142857142


  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 46.51it/s]


Epoch:  8  Validation Loss:  11.242371082305908
Val EM:  20.0


  0%|          | 0/7 [00:00<?, ?it/s] 43%|████▎     | 3/7 [00:00<00:00, 23.44it/s] 86%|████████▌ | 6/7 [00:00<00:00, 24.16it/s]100%|██████████| 7/7 [00:00<00:00, 26.52it/s]


Epoch:  9  Training Loss:  11.148239135742188
Train EM:  19.642857142857142


  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 47.30it/s]


Epoch:  9  Validation Loss:  11.285048961639404
Val EM:  20.0


  0%|          | 0/7 [00:00<?, ?it/s] 43%|████▎     | 3/7 [00:00<00:00, 23.39it/s] 86%|████████▌ | 6/7 [00:00<00:00, 24.05it/s]100%|██████████| 7/7 [00:00<00:00, 26.42it/s]


Epoch:  10  Training Loss:  11.179489135742188
Train EM:  19.642857142857142


  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 46.46it/s]


Epoch:  10  Validation Loss:  11.320613384246826
Val EM:  20.0


  0%|          | 0/7 [00:00<?, ?it/s] 43%|████▎     | 3/7 [00:00<00:00, 23.60it/s] 86%|████████▌ | 6/7 [00:00<00:00, 23.67it/s]100%|██████████| 7/7 [00:00<00:00, 26.13it/s]


Epoch:  11  Training Loss:  11.148239135742188
Train EM:  19.642857142857142


  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 46.76it/s]


Epoch:  11  Validation Loss:  11.25659704208374
Val EM:  20.0


  0%|          | 0/7 [00:00<?, ?it/s] 43%|████▎     | 3/7 [00:00<00:00, 23.95it/s] 86%|████████▌ | 6/7 [00:00<00:00, 24.18it/s]100%|██████████| 7/7 [00:00<00:00, 26.59it/s]


Epoch:  12  Training Loss:  11.132614135742188
Train EM:  19.642857142857142


  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 47.00it/s]


Epoch:  12  Validation Loss:  11.19258165359497
Val EM:  20.0


  0%|          | 0/7 [00:00<?, ?it/s] 43%|████▎     | 3/7 [00:00<00:00, 24.21it/s] 86%|████████▌ | 6/7 [00:00<00:00, 23.92it/s]100%|██████████| 7/7 [00:00<00:00, 26.45it/s]


Epoch:  13  Training Loss:  11.116988999502999
Train EM:  19.642857142857142


  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 46.57it/s]


Epoch:  13  Validation Loss:  11.249484539031982
Val EM:  20.0


  0%|          | 0/7 [00:00<?, ?it/s] 43%|████▎     | 3/7 [00:00<00:00, 23.88it/s] 86%|████████▌ | 6/7 [00:00<00:00, 23.99it/s]100%|██████████| 7/7 [00:00<00:00, 26.45it/s]


Epoch:  14  Training Loss:  11.179489135742188
Train EM:  19.642857142857142


  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 47.75it/s]


Epoch:  14  Validation Loss:  11.27793550491333
Val EM:  20.0


  0%|          | 0/7 [00:00<?, ?it/s] 43%|████▎     | 3/7 [00:00<00:00, 23.85it/s] 86%|████████▌ | 6/7 [00:00<00:00, 23.65it/s]100%|██████████| 7/7 [00:00<00:00, 26.15it/s]


Epoch:  15  Training Loss:  11.116989135742188
Train EM:  19.642857142857142


  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 46.81it/s]


Epoch:  15  Validation Loss:  11.206806659698486
Val EM:  20.0


  0%|          | 0/7 [00:00<?, ?it/s] 43%|████▎     | 3/7 [00:00<00:00, 23.70it/s] 86%|████████▌ | 6/7 [00:00<00:00, 23.88it/s]100%|██████████| 7/7 [00:00<00:00, 26.33it/s]


Epoch:  16  Training Loss:  11.195114135742188
Train EM:  19.642857142857142


  0%|          | 0/2 [00:00<?, ?it/s]100%|██████████| 2/2 [00:00<00:00, 46.52it/s]

Epoch:  16  Validation Loss:  11.285048961639404
Val EM:  20.0





In [92]:
print(B_train, correct_train)

195 39


In [68]:
# ----------- Main Training Loop -----------
max_epoch = 1

best_test_acc = 0
for ep in range(max_epoch):

    epoch_loss = 0

    model.train()
    train_labels = []
    train_pred = []
    correct_train, B_train = 0, 0
    for cont, ques, ans, ip, out in tqdm(val_loader):
        loss = 0
        cont = cont.to(device)
        ques = ques.to(device)
        ans = ans.to(device)

        ans_pred = model(cont, False, ans.shape[1])

        for i in range(ans.shape[1]):
            loss += torch.mean(loss_fn(ans_pred[:, i], ans[:, i]) * ans[:, i])


        epoch_loss += float(loss)

        for b in range(ans.shape[0]):
            p = [[]]
            for i in range(ans.shape[1]):
                print(id_to_vocab_target[torch.argmax(ans_pred[b,i,:]).item()], end=" ")
                p[0].append(id_to_vocab_target[torch.argmax(ans_pred[b,i,:]).item()])
                print(id_to_vocab_target[ans[b, i].item()])
            print()
            outt = calculate_metric(p, [ip[b]])
            print(outt, out[b])
        #y_hat = torch.softmax(y_hat, dim = 1).argmax(dim=1)

        #correct, B = exact_match(y_hat, yb)
        #correct_train += correct
        #B_train += B
        #train_labels.extend(yb.cpu().detach().numpy())
        #train_pred.extend(y_hat.cpu().detach().numpy())

    print("Epoch: ", ep+1, " Training Loss: ", epoch_loss)
    #print("Train accuracy: ", accuracy_score(train_labels, train_pred)*100)
    #print("Train EM: ", (correct_train/ B_train)*100)


  0%|          | 0/195 [00:00<?, ?it/s] 12%|█▏        | 23/195 [00:00<00:00, 229.46it/s] 24%|██▍       | 47/195 [00:00<00:00, 235.29it/s]

- -
number1 -
number1 number0
number0 number2
number1 number1

[0] 35.0
number1 -
number1 number0
number1 +
- number3
number0 number5

[2] 68.0
- +
number0 number0
number1 number1

[-34] 88.0
number0 +
+ number0
number0 number1

[55] 98.0
* +
number0 *
- number1
- number2
number1 number0

[13] 21.0
- *
number0 number0
number1 number1

[-45] 250.0
- +
number0 -
/ number0
number0 number1
number1 number2

[45.12] 62.0
- +
number0 number0
number1 number1

[4] 20.0
+ -
number1 -
- number0
* number1
number1 number2

[93] 11.0
+ +
number0 number0
- number1

[601] 601.0
number0 -
number1 number0
number1 number1

[68] 43.07
/ /
number0 number0
number1 number1

[10.0] 10.0
number1 +
/ number0
number1 number1

[345] 934.0
+ +
number0 +
- number2
number0 number1
number1 number0

[29] 41.0
- +
* +
number1 number0
+ number1
number0 number2

[17] 20.0
number1 -
number0 number0
number1 number1

[55] 18.0
* +
number1 *
number0 number0
number0 number1
- number2

[32] 34.0
- -
number0 number1
number1 num

 37%|███▋      | 72/195 [00:00<00:00, 239.19it/s]

-
number0 number0
number1 number1

[1.4444444444444444] 4.0
+ *
number0 number0
number1 number1

[9] 14.0
- /
+ number0
- number1

[20] 9.0
* -
number1 number0
- number1

[613] 345.0
number1 +
number0 number0
+ number1

[56] 91.0
/ /
number0 number0
number1 number1

[8.0] 8.0
number1 *
+ number0
number0 number1

[34] 204.0
/ *
number2 number0
number1 number1

[1.9473684210526316] 703.0
- -
number0 number0
number1 number1

[4] 4.0
number0 /
- number0
number0 number1

[42] 3.5
- /
number0 -
number1 number0
+ number1
number0 number2

[40] 5.0
+ /
number0 number0
number0 number1

[384] 48.0
- /
number0 +
+ number0
number0 number1
number1 number2

[-2] 5.0
number0 -
number1 number0
number1 number1

[555] 208.0
- +
number0 number0
number1 number1

[5] 9.0
* +
number0 number0
number0 number1

[186624] 729.0
number0 /
number2 +
number1 number0
number1 number1
number2 number2

[44] 8.0
+ +
number0 number1
number2 number0

[24] 66.0
number1 -
number0 -
number1 number0
number0 number1
+ number2



 50%|████▉     | 97/195 [00:00<00:00, 240.57it/s] 63%|██████▎   | 122/195 [00:00<00:00, 242.72it/s]

-
- number1
number0 number3

[64] 35.0
number1 *
number0 number0
number1 number1

[3] 81.0
- -
+ -
number0 number0
- number1
number0 number2

[111] 44.0
/ -
number0 number0
- number1

[74] 12.0
* *
number1 number1
number2 number2

[2916] 2916.0
number1 +
+ number0
number0 number1

[38] 217.0
- +
number1 -
number0 number0
number1 number1
number0 number2

[-35] 56.0
- -
number0 number0
number1 number1

[276] 276.0
- -
number0 number2
number1 number1

[-441] 9.0
/ -
number0 number1
number0 number0

[1.0] 36.0
* /
number0 number1
number0 number0

[6724] 22.0
+ -
number0 number0
number0 number1

[92] 32.0
- -
number0 number0
number2 +
number1 number1
number0 number2

[640] 315.0
+ +
number1 number0
- number1

[700] 700.0
number0 +
number0 +
number2 number0
number1 number1
number0 number2

[42] 99.0
- +
number0 number0
number1 number1

[-7] 63.0
- -
number1 number0
number1 number1

[0] 3.0
- /
number0 number0
number1 number2

[-1] 3.0
+ +
number0 *
number1 number0
number2 number1
number1 num

 75%|███████▌  | 147/195 [00:00<00:00, 242.74it/s] 88%|████████▊ | 172/195 [00:00<00:00, 242.22it/s]

number1

[1.8333333333333333] 15.0
number1 -
number0 number1
number1 +
number1 number2
number0 number3

[650] 225.0
+ +
number1 -
number1 number0
number1 number1
number2 number2

[50] 47.0
number1 /
/ number0
number0 number1

[8] 6.0
number0 /
number1 number0
number1 number1

[294] 42.0
- *
number1 number0
number1 number1

[0] 6835.0
+ -
number0 number0
number0 number1

[70] 5.0
- +
- number0
number1 number1

[96] 96.0
* -
number0 number0
number1 number1

[1536] 16.0
+ +
number1 -
number0 number0
+ number1
/ number2

[57] 56.0
number1 -
number0 number1
number1 number3

[64] 48.0
number0 +
number1 number0
number1 number1

[45] 73.0
- /
number1 number2
number0 number1

[14] 12.0
- -
number1 +
number1 number0
number0 number1
number1 number2

[0] 15.0
- -
* number0
number1 +
number1 number2
number1 number3

[6] 262.0
- /
number0 number0
number1 number1

[23] 8.66666666666667
- *
number0 number0
number1 number1

[261] 1876.0
+ -
number1 number0
number0 number1

[93] 57.0
number0 -
/ number1

100%|██████████| 195/195 [00:00<00:00, 241.09it/s]

*
/ -
number2 number1
number0 number2
- number0

[19] 72.0
Epoch:  1  Training Loss:  5024.410477876663



