In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras

import re
from matplotlib import pyplot as plt


In [2]:
df = pd.read_csv('cleaned-data/train-fixed.no-ol.csv')
df.head()

Unnamed: 0,mr,ref,fixed,orig_mr
0,"name[The Eagle], eatType[coffee shop], food[Ja...",The Eagle is a low rated coffee shop near Burg...,0,"name[The Eagle], eatType[coffee shop], food[Ja..."
1,"name[The Mill], eatType[coffee shop], food[Fre...",Located near The Sorrento is a French Theme ea...,1,"name[The Mill], eatType[coffee shop], food[Fre..."
2,"name[Loch Fyne], food[French], area[riverside]...","For luxurious French food, the Loch Fyne is lo...",1,"name[Loch Fyne], food[French], customer rating..."
3,"name[The Rice Boat], eatType[restaurant], food...",The Rice Boat is an adult French restaurant wi...,1,"name[The Rice Boat], food[French], customer ra..."
4,"name[The Wrestlers], eatType[coffee shop], foo...",The Wrestlers coffee shop serves Japanese food...,0,"name[The Wrestlers], eatType[coffee shop], foo..."


In [3]:
train_data = tf.data.experimental.CsvDataset(filenames='cleaned-data/train-fixed.no-ol.csv', 
                                record_defaults=[tf.string, tf.string],
                                header=True,
                                select_cols=[0, 1])

In [4]:
for i, d in enumerate(train_data):
    if i > 1:
        break
    print(d)

(<tf.Tensor: shape=(), dtype=string, numpy=b'name[The Eagle], eatType[coffee shop], food[Japanese], priceRange[less than \xc2\xa320], customer rating[low], area[riverside], familyFriendly[yes], near[Burger King]'>, <tf.Tensor: shape=(), dtype=string, numpy=b'The Eagle is a low rated coffee shop near Burger King and the riverside that is family friendly and is less than \xc2\xa320 for Japanese food.'>)
(<tf.Tensor: shape=(), dtype=string, numpy=b'name[The Mill], eatType[coffee shop], food[French], area[riverside], near[The Sorrento]'>, <tf.Tensor: shape=(), dtype=string, numpy=b'Located near The Sorrento is a French Theme eatery and coffee shop called The Mill, with a price range at \xc2\xa320-\xc2\xa325 it is in the riverside area.'>)


In [5]:
from transformers import AutoTokenizer, AutoModelWithLMHead

tokenizer = AutoTokenizer.from_pretrained("t5-small")

special_tokens = {'additional_special_tokens': ['<area>', '<eatType>', '<food>', '<near>',                                                      '<name>', '<customer rating>', '<priceRange>',                                                   '<familyFriendly>', '<notfamilyFriendly>',                                                       '<cr_slot>', '<pr_slot>', '<sos>']}
tokenizer.add_special_tokens(special_tokens)

12

In [6]:
len(tokenizer)

32112

In [6]:
from e2e_transformers.model import E2ETransformer

sample_transformer = E2ETransformer(
    num_enc_layers=2, num_dec_layers=2, d_model=512, num_heads=8, dff=2048, 
    input_vocab_size=8500, target_vocab_size=8000, 
    pe_input=10000, pe_target=6000)

temp_input = tf.random.uniform((64, 38), dtype=tf.int64, minval=0, maxval=200)
temp_target = tf.random.uniform((64, 36), dtype=tf.int64, minval=0, maxval=200)

fn_out, _ = sample_transformer(temp_input, temp_input, temp_target, training=False, 
                               enc_padding_mask=None, 
                               look_ahead_mask=None,
                               dec_padding_mask=None)

fn_out.shape  # (batch_size, tar_seq_len, target_vocab_size)

TensorShape([64, 36, 8000])

In [54]:
def p(x):
    seq = tf.cast(tf.logical_not(tf.math.equal(x, 0)), tf.float32)
    return seq[:, :, tf.newaxis]

a = tf.constant([[1, 2, 0, 0], [1, 2, 9, 0],[1, 0, 0, 0]])
b = tf.constant([[6, 5, 3, 8], [6, 5, 3, 8], [6, 5, 3, 8]])
init = tf.keras.initializers.Constant(tf.ones((10, 4), tf.float32))
emb = tf.keras.layers.Embedding(10, 4, embeddings_initializer=init)

print(emb(b) + tf.multiply(emb(a), p(a)))

tf.Tensor(
[[[2. 2. 2. 2.]
  [2. 2. 2. 2.]
  [1. 1. 1. 1.]
  [1. 1. 1. 1.]]

 [[2. 2. 2. 2.]
  [2. 2. 2. 2.]
  [2. 2. 2. 2.]
  [1. 1. 1. 1.]]

 [[2. 2. 2. 2.]
  [1. 1. 1. 1.]
  [1. 1. 1. 1.]
  [1. 1. 1. 1.]]], shape=(3, 4, 4), dtype=float32)


In [51]:
p(a)

<tf.Tensor: shape=(3, 4, 1), dtype=float32, numpy=
array([[[0.],
        [0.],
        [1.],
        [1.]],

       [[0.],
        [0.],
        [0.],
        [1.]],

       [[0.],
        [1.],
        [1.],
        [1.]]], dtype=float32)>

In [28]:
a.dtype

tf.int32

In [11]:
!python train.py -train_path cleaned-data/test-fixed.csv -epoch 2 -b 32 -d_inner_hid=64 -embedding dummy_embed.npy -n_heads 2 -n_dec_layers 3

2020-10-06 19:41:18.716051: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library cudart64_101.dll
2020-10-06 19:41:26.750148: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library nvcuda.dll
2020-10-06 19:41:26.750885: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: 
pciBusID: 0000:02:00.0 name: GeForce MX250 computeCapability: 6.1
coreClock: 1.582GHz coreCount: 3 deviceMemorySize: 2.00GiB deviceMemoryBandwidth: 52.21GiB/s
2020-10-06 19:41:26.751543: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library cudart64_101.dll
2020-10-06 19:41:26.751882: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library cublas64_10.dll
2020-10-06 19:41:26.752374: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library cufft64_10.dll
2020-10