In [1]:
import tensorflow as tf
from transformers import T5Tokenizer, TFT5Model, TFT5ForConditionalGeneration
import tensorflow_datasets as tfds
import pandas as pd
#from pandas_tfrecords import pd2tf, tf2pd
import time
import numpy as np

In [2]:
BATCH_SIZE = 16

SHUFFEL_SIZE = 1024

learning_rate = 3e-5

In [3]:
tokenizer = T5Tokenizer.from_pretrained('t5-small')
model = TFT5ForConditionalGeneration.from_pretrained('t5-small')

task_specific_params = model.config.task_specific_params
if task_specific_params is not None:
    model.config.update(task_specific_params.get("summarization", {}))
    
pad_token_id = tokenizer.pad_token_id

All model checkpoint layers were used when initializing TFT5ForConditionalGeneration.

All the layers of TFT5ForConditionalGeneration were initialized from the model checkpoint at t5-small.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.


In [4]:
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, epsilon=1e-08, clipnorm=1.0)
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

val_loss = tf.keras.metrics.Mean(name='val_loss')
val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')

model.summary()

Model: "tf_t5for_conditional_generation"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
shared (TFSharedEmbeddings)  multiple                  16449536  
_________________________________________________________________
encoder (TFT5MainLayer)      multiple                  18881280  
_________________________________________________________________
decoder (TFT5MainLayer)      multiple                  25175808  
Total params: 60,506,624
Trainable params: 60,506,624
Non-trainable params: 0
_________________________________________________________________


In [5]:
reviews = pd.read_csv("Reviews.csv")
reviews = reviews[['Text','Summary']]
reviews

Unnamed: 0,Text,Summary
0,I have bought several of the Vitality canned d...,Good Quality Dog Food
1,Product arrived labeled as Jumbo Salted Peanut...,Not as Advertised
2,This is a confection that has been around a fe...,"""Delight"" says it all"
3,If you are looking for the secret ingredient i...,Cough Medicine
4,Great taffy at a great price. There was a wid...,Great taffy
...,...,...
568449,Great for sesame chicken..this is a good if no...,Will not do without
568450,I'm disappointed with the flavor. The chocolat...,disappointed
568451,"These stars are small, so you can give 10-15 o...",Perfect for our maltipoo
568452,These are the BEST treats for training and rew...,Favorite Training and reward treat


In [19]:
art=reviews.iloc[[3]]['Text'].values[0]

In [13]:
reviews.iloc[[1]]['Text'].values[0]

'Product arrived labeled as Jumbo Salted Peanuts...the peanuts were actually small sized unsalted. Not sure if this was an error or if the vendor intended to represent the product as "Jumbo".'

In [6]:
reviews = reviews.drop_duplicates(subset=['Summary'], keep='last')
reviews = reviews.sample(frac = 1)
reviews.shape

(295743, 2)

In [7]:
reviews.Summary = reviews.Summary.apply(lambda x: str(x) )
reviews.Text = reviews.Text.apply(lambda x: str(x) )

In [6]:
def normalize_text(text):
    """Lowercase and remove quotes from a TensorFlow string."""
    text = tf.strings.lower(text)
    return text.numpy().decode('UTF-8')

def tokenize_articles(text):
    text = normalize_text(text)
    ids = tokenizer.encode_plus((model.config.prefix + text), return_tensors="tf", max_length=512) 

    return tf.squeeze(ids['input_ids']), tf.squeeze(ids['attention_mask'])
        
def tokenize_highlights(text):
    text = normalize_text(text)
    ids = tokenizer.encode(text, return_tensors="tf", max_length=150)
    return tf.squeeze(ids)



def map_func(features):
    text_1=tf.reshape(features['text'],[])
    summary_1=tf.reshape(features['summary'],[])
    article_ids, attention_mask = tf.py_function(tokenize_articles, inp=[text_1], Tout=(tf.int32, tf.int32))
    highlights_ids = tf.py_function(tokenize_highlights, inp=[summary_1], Tout=tf.int32)

    return article_ids, attention_mask, highlights_ids

In [9]:
train, validate, test =np.split(reviews.sample(frac=1, random_state=42),[int(.6*len(reviews)), int(.8*len(reviews))])

In [10]:
#Train
train_txt=train[['Text']]
train_lab=train[['Summary']]

test_txt=test[['Text']]
test_lab=test[['Summary']]

val_txt=validate[['Text']]
val_lab=validate[['Summary']]

train_tfds=tf.data.Dataset.from_tensor_slices({"text":train_txt.values,"summary":train_lab.values})
test_tfds=tf.data.Dataset.from_tensor_slices({"text":test_txt.values,"summary":test_lab.values})
val_tfds=tf.data.Dataset.from_tensor_slices({"text":val_txt.values,"summary":val_lab.values})

In [11]:
len_train = len(list(train_tfds))
len_test = len(list(test_tfds))
len_val = len(list(val_tfds))

In [12]:
train_ds = train_tfds.map(map_func)\
    .shuffle(SHUFFEL_SIZE)\
    .padded_batch(BATCH_SIZE, padded_shapes=([512],[512],[150]))\
    .prefetch(tf.data.experimental.AUTOTUNE)

val_ds = val_tfds.map(map_func)\
    .shuffle(SHUFFEL_SIZE)\
    .padded_batch(BATCH_SIZE, padded_shapes=([512],[512],[150]))\
    .prefetch(tf.data.experimental.AUTOTUNE)

test_ds = test_tfds.map(map_func)\
    .shuffle(SHUFFEL_SIZE)\
    .padded_batch(BATCH_SIZE, padded_shapes=([512],[512],[150]))\
    .prefetch(tf.data.experimental.AUTOTUNE)

In [13]:
@tf.function
def train_step(input_ids, input_mask, y):
    y_ids = y[:, :-1]
    lm_labels = tf.identity(y[:, 1:])
    lm_labels = tf.where(tf.equal(y[:, 1:],pad_token_id), -100, lm_labels)

    with tf.GradientTape() as tape:
        # prediction_scores: (bs, 150, 32128)
        # decoder_past_key_value_states: (bs, 512, 512), (bs, 8, 150, 64)
        # z: (bs, 512, 512)
        predictions= model(input_ids, attention_mask=input_mask, decoder_input_ids=y_ids,labels=lm_labels, training=True)
        #print(model(input_ids=input_ids, attention_mask=input_mask, decoder_input_ids=y_ids,labels=lm_labels, training=True))
        loss = loss_object(y[:, 1:],predictions.logits)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    
    train_loss(loss)
    train_accuracy(y[:, 1:], predictions.logits)

In [14]:
@tf.function
def val_step(input_ids, input_mask, y):
    y_ids = y[:, :-1]
    lm_labels = tf.identity(y[:, 1:])
    lm_labels = tf.where(tf.equal(y[:, 1:],pad_token_id), -100, lm_labels)
    
    predictions= model(input_ids, attention_mask=input_mask, decoder_input_ids=y_ids,labels=lm_labels, training=False)
    v_loss = loss_object(y[:, 1:], predictions.logits)

    val_loss(v_loss)
    val_accuracy(y[:, 1:], predictions.logits)

In [17]:
EPOCHS = 1
log_interval = 200
for epoch in range(EPOCHS):
    # reset metrics
    train_loss.reset_states()
    train_accuracy.reset_states()
    
    val_loss.reset_states()
    val_accuracy.reset_states()
    
    val_batches = iter(val_ds)
    
    start_time = time.time()
    for i, (input_ids, input_mask, y) in enumerate(train_ds):
        # training
        #try:
        train_step(input_ids, input_mask, y)
        #except:
        #continue
        # validation
        if i % log_interval == 0:
            x_val, x_mask_val, y_val = next(val_batches)
            val_step(x_val, x_mask_val, y_val)
            elapsed = time.time() - start_time
            print('| epoch {:3d} | [{:5d}/{:5d}] | '
                  'ms/batch {:5.2f} | '
                  'train acc {:5.2f} | val acc {:5.2f} |'
                  'loss {:5.2f} | val loss {:5.2f}'.format(
                    epoch, i, int(len_train/BATCH_SIZE),
                    elapsed * 1000 / log_interval,
                    train_accuracy.result() * 100, val_accuracy.result() * 100, 
                    train_loss.result(),  val_loss.result()))
            start_time = time.time()

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "




ResourceExhaustedError: 2 root error(s) found.
  (0) Resource exhausted:  OOM when allocating tensor with shape[16,512,512] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node tf_t5for_conditional_generation/encoder/block_._0/layer_._0/layer_norm/mul (defined at c:\users\yogesh\appdata\local\programs\python\python37\lib\site-packages\transformers\models\t5\modeling_tf_t5.py:89) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[gradient_tape/tf_t5for_conditional_generation/encoder/block_._0/layer_._0/SelfAttention/relative_attention_bias/embedding_lookup/Reshape/_36]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

  (1) Resource exhausted:  OOM when allocating tensor with shape[16,512,512] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node tf_t5for_conditional_generation/encoder/block_._0/layer_._0/layer_norm/mul (defined at c:\users\yogesh\appdata\local\programs\python\python37\lib\site-packages\transformers\models\t5\modeling_tf_t5.py:89) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

0 successful operations.
0 derived errors ignored. [Op:__inference_train_step_620154]

Errors may have originated from an input operation.
Input Source operations connected to node tf_t5for_conditional_generation/encoder/block_._0/layer_._0/layer_norm/mul:
 tf_t5for_conditional_generation/encoder/dropout_24/dropout/Mul_1 (defined at c:\users\yogesh\appdata\local\programs\python\python37\lib\site-packages\transformers\models\t5\modeling_tf_t5.py:724)

Input Source operations connected to node tf_t5for_conditional_generation/encoder/block_._0/layer_._0/layer_norm/mul:
 tf_t5for_conditional_generation/encoder/dropout_24/dropout/Mul_1 (defined at c:\users\yogesh\appdata\local\programs\python\python37\lib\site-packages\transformers\models\t5\modeling_tf_t5.py:724)

Function call stack:
train_step -> train_step


In [None]:
model.save_pretrained("amazon_model")
tokenizer.save_pretrained("amazon_tokeniser")
!cp -r '/content/amazon_model' /content/drive/MyDrive
!cp -r '/content/amazon_tokeniser' /content/drive/MyDrive

In [9]:
ids =tokenizer.encode_plus((model.config.prefix + art), return_tensors="tf", max_length=512) 
sum = model.generate(input_ids=ids['input_ids'], attention_mask=ids['attention_mask'])
pred = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in sum]
pred

['the food we had enjoyed at the time of dinner . it was really delicious, everything had unique taste which we had ordered, nice arrangement and services from the staff while eating .']

In [7]:
art="""The Food we had enjoyed at the time of dinner. It was really delicious taste with great quality, 
everything had unique taste which we had ordered, nice arrangement and services from the staff while eating,
we found nothing bad about this hotel."""

In [26]:
art="""This tea is fantastic! I am a lover of orange tea and have tried many different ones; this one really stands out. 
The orange blossoms are nestled in copious amounts within the oolong tea leaves. There's no bitter
taste with this tea, and the orange flavor blends perfectly with the oolong. What a find, what a great tea!"""