In [13]:
import tensorflow as tf
import tensorflow_addons as tfa
import numpy as np
import time
import pathlib
import os
import PIL

from model.decoder import Decoder
from model.encoder import CNN_Encoder
from model.attention import BahdanauAttention
from model.decoder import embedding_initializer
from components.positional import add_timing_signal_nd
from sklearn.model_selection import train_test_split

## Dataset

In [3]:
data_dir = pathlib.Path("data/small")

In [4]:
imgs = list(data_dir.glob("*.png"))

In [5]:
imgs_paths = []
for img in imgs:
    imgs_paths.append(os.fspath(img))

In [6]:
imgs_paths = sorted(set(imgs_paths))

In [7]:
lines = open("data/small.formulas.norm.txt", 'r').read().split('\n')

In [8]:
for i in range(len(lines)):
    lines[i] = "<start> " + lines[i] + " <end>" 

## Tokenize

In [10]:
top_k = 400

tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=top_k, oov_token="<unk>", filters='!"#$%&()*+.,-/:;=?@[\]^_`{|}~ ')
tokenizer.fit_on_texts(lines)
train_seqs = tokenizer.texts_to_sequences(lines)

In [11]:
tokenizer.word_index['<pad>'] = 0
tokenizer.index_word[0] = '<pad>'

In [12]:
cap_vector = tf.keras.preprocessing.sequence.pad_sequences(train_seqs, padding='post')

## Split Dataset

In [14]:
embedding_dim = 80
BATCH_SIZE = 2
units = 512
vocab_size = top_k + 1

In [15]:
def load_image(image_path, formula):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_png(img)
    return img, formula

In [16]:
# Create training and validation sets using an 80-20 split
img_name_train, img_name_val, cap_train, cap_val = train_test_split(imgs_paths,
                                                                    cap_vector,
                                                                    test_size=0.2,
                                                                    random_state=0)

In [17]:
dataset = tf.data.Dataset.from_tensor_slices((img_name_train, cap_train))

In [18]:
dataset = dataset.map(load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE)

In [19]:
dataset = dataset.shuffle(1000).batch(BATCH_SIZE)

In [20]:
dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

## Init Encoder Decoder

In [22]:
encoder = CNN_Encoder(embedding_dim)

In [23]:
decoder = Decoder(embedding_dim, 512, vocab_size=top_k+1, )

TypeError: __init__() missing 4 required positional arguments: 'embedding_dim', 'units', 'vocab_size', and 'id_end'

## Checkpoints

In [21]:
checkpoint_path = "./checkpoints/train"
ckpt = tf.train.Checkpoint(encoder=encoder, decoder=decoder, optimizer=optimizer)
ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)

NameError: name 'encoder' is not defined

In [16]:
start_epoch = 0

if ckpt_manager.latest_checkpoint:
    start_epoch = int(ckpt_manager.latest_checkpoint.split('-')[-1])
    # restore to latest cehckpoint
    ckpt.restore(ckpt_manager.latest_checkpoint)

## Training

In [82]:
loss_plot = []

In [None]:
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)
    
    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask
    
    return tf.reduce_mean(loss_)

In [83]:
@tf.function
def train_step(img_tensor, target):
    loss = 0
    
    hidden = decoder.reset_state(batch_size=target.shape[0])
    
    dec_input = tf.expand_dims([tokenizer.word_index['<start>']] * target.shape[0], 1)
    
    with tf.GradientTape() as tape:
        features = encoder(img_tensor)
        
        for i in range(1, target.shape[1]):
            predictions, hidden, _ = decoder(dec_input, features, hidden)
            
            loss += loss_function(target[:, i], predictions)
            
            dec_input = tf.expand_dims(target[:, i], 1)
            
        total_loss = (loss / int(target.shape[1]))
        
        trainable_variables = encoder.trainable_variables + decoder.trainable_variables
        
        gradients = tape.gradient(loss, trainable_varaibles)
        
        optimizer.apply_gradients(zip(gradients, trainable_variables))
        
        return loss, total_loss
    

In [85]:
EPOCHS = 20

for epoch in range(start_epoch, EPOCHS):
    start = time.time()
    total_loss = 0
    
    for (batch, (img_tensor, target)) in enumerate(dataset):
        batch_loss, t_loss = train_step(img_tensor, target)
        total_loss += t_loss
    
        if batch % 100 == 0:
            print ('Epoch {} Batch {} Loss {:.4f}'.format(
                  epoch + 1, batch, batch_loss.numpy() / int(target.shape[1])))
        
    loss_plot.append(total_loss / num_steps)
    
    if epoch % 5 == 0:
        ckpt_manager.save()
        
    print ('Epoch {} Loss {:.6f}'.format(epoch + 1,
                                         total_loss/num_steps))
    print ('Time taken for 1 epoch {} sec\n'.format(time.time() - start))


TypeError: in user code:

    <ipython-input-63-535913d6ee5b>:10 train_step  *
        features = encoder(img_tensor)
    <ipython-input-5-42a4df044102>:20 call  *
        x = self.encoder(x)
    /Users/frankpalma/Projects/img2latex/env/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:985 __call__  **
        outputs = call_fn(inputs, *args, **kwargs)
    /Users/frankpalma/Projects/img2latex/env/lib/python3.8/site-packages/tensorflow/python/keras/engine/sequential.py:386 call
        outputs = layer(inputs, **kwargs)
    /Users/frankpalma/Projects/img2latex/env/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:985 __call__
        outputs = call_fn(inputs, *args, **kwargs)
    /Users/frankpalma/Projects/img2latex/env/lib/python3.8/site-packages/tensorflow/python/keras/layers/convolutional.py:247 call
        outputs = self._convolution_op(inputs, self.kernel)
    /Users/frankpalma/Projects/img2latex/env/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /Users/frankpalma/Projects/img2latex/env/lib/python3.8/site-packages/tensorflow/python/ops/nn_ops.py:1011 convolution_v2
        return convolution_internal(
    /Users/frankpalma/Projects/img2latex/env/lib/python3.8/site-packages/tensorflow/python/ops/nn_ops.py:1141 convolution_internal
        return op(
    /Users/frankpalma/Projects/img2latex/env/lib/python3.8/site-packages/tensorflow/python/ops/nn_ops.py:2585 _conv2d_expanded_batch
        return gen_nn_ops.conv2d(
    /Users/frankpalma/Projects/img2latex/env/lib/python3.8/site-packages/tensorflow/python/ops/gen_nn_ops.py:975 conv2d
        _, _, _op, _outputs = _op_def_library._apply_op_helper(
    /Users/frankpalma/Projects/img2latex/env/lib/python3.8/site-packages/tensorflow/python/framework/op_def_library.py:576 _apply_op_helper
        _SatisfiesTypeConstraint(base_type,
    /Users/frankpalma/Projects/img2latex/env/lib/python3.8/site-packages/tensorflow/python/framework/op_def_library.py:57 _SatisfiesTypeConstraint
        raise TypeError(

    TypeError: Value passed to parameter 'input' has DataType uint8 not in list of allowed values: float16, bfloat16, float32, float64, int32
