# AI Engineering: Text Processing

## >Imports

In [3]:
!pip install tf keras




[notice] A new release of pip is available: 23.2.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow.keras as keras
from numpy.linalg import norm
import tf_keras
import tensorflow_datasets as tfds

## >Text Embedding

In [5]:
MODEL_URL = 'https://tfhub.dev/google/universal-sentence-encoder/4'

In [6]:
model = hub.load(MODEL_URL)













In [7]:
def embed(input_text, embed_model=model):
    return embed_model(input_text)

In [8]:
embeddings = embed(['This is a sentence.'])

In [9]:
embeddings[0].numpy()[0:10]

array([ 0.02881764, -0.02020013,  0.01069627,  0.03850532, -0.09253702,
        0.01752776, -0.04711755,  0.04785209,  0.01430714,  0.02635949],
      dtype=float32)

## Semantic Similarity *Scoring*

In [10]:
def cos_sim(A,B):
    return np.inner(A,B)/(norm(A)*norm(B))

def euclidiean(A,B):
    return norm(A-B)

In [11]:
def is_it_sim(textA, textB, thresh=.2, sim_func=cos_sim):
    embeddingA = embed([textA])
    embeddingB = embed([textB])
    sim_score = sim_func(embeddingA,embeddingB)
    if sim_score>thresh:
        print('They are SIMILAR')
    else:
        print('They are NOT similar')
    return sim_score, sim_score > thresh

In [12]:
questionA = 'This is a technology Company that builds computers'
answerA = 'I have a deslicious fruit called an apple'
answerB = 'I am writing a program on my Apple desktop'
answerC = 'My favorite PC is not an HP'

In [13]:
is_it_sim(questionA, answerA)
is_it_sim(questionA, answerB)
is_it_sim(questionA, answerC)
is_it_sim(answerB, answerC)

They are NOT similar
They are SIMILAR
They are SIMILAR
They are SIMILAR


(array([[0.32243395]], dtype=float32), array([[ True]]))

## >Text Classification using Embedding & GUSE

In [14]:
train_data, validation_data, test_data = tfds.load(
    name="imdb_reviews",
    split=('train[:60%]','train[60%:]','test'),
    as_supervised =True)

In [15]:
train_examples_batch, train_labels_batch = next(iter(train_data.batch(10)))

In [16]:
train_examples_batch[0]

<tf.Tensor: shape=(), dtype=string, numpy=b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it.">

## >Load Hub Data and Create a Hub Layer

In [17]:
embedding_model_url='https://tfhub.dev/google/universal-sentence-encoder/4'

In [18]:
hub_layer = hub.KerasLayer(embedding_model_url,
                           input_shape=[],
                           dtype=tf.string,
                           trainable=False)







## >Build a NN Text Classifier

In [19]:
nlp_model = tf_keras.Sequential()

In [20]:
nlp_model.add(hub_layer)

nlp_model.add(tf_keras.layers.Dense(256, activation='relu'))
nlp_model.add(tf_keras.layers.Dropout(0.1))

nlp_model.add(tf_keras.layers.Dense(128, activation='relu'))
nlp_model.add(tf_keras.layers.Dropout(0.1))

nlp_model.add(tf_keras.layers.Dense(64, activation='relu'))

nlp_model.add(tf_keras.layers.Dense(1, activation='sigmoid'))


In [21]:
nlp_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer (KerasLayer)    (None, 512)               256797824 
                                                                 
 dense (Dense)               (None, 256)               131328    
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 128)               32896     
                                                                 
 dropout_1 (Dropout)         (None, 128)               0         
                                                                 
 dense_2 (Dense)             (None, 64)                8256      
                                                                 
 dense_3 (Dense)             (None, 1)                 6

## >Compile and Train NLP Model

In [22]:
nlp_model.compile(optimizer='Adam',
                  loss=tf_keras.losses.BinaryCrossentropy(from_logits=False),
                  metrics=['binary_accuracy'])







In [23]:
history = nlp_model.fit(train_data.batch(512),
                    epochs=20,
                    validation_data=validation_data.batch(512),
                    verbose=1)

Epoch 1/20












Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


## >Model Evaluation

In [24]:
results = nlp_model.evaluate(test_data.batch(512), verbose=1)



## >Save Model Weights

In [25]:
nlp_model.save('my_model')


INFO:tensorflow:Assets written to: my_model\assets


INFO:tensorflow:Assets written to: my_model\assets


## >Inference Function

In [26]:
def get_sentiment(text, model=nlp_model, thresh=0.5):
  p_hat =model.predict([text])[0][0]
  out = (p_hat>thresh).astype('int32')

  print("Viewer Comment:\n"+text+"\n\nThe Review was:")
  if out: print("->It was Good!")
  else: print("->Bad Movie!")
  return int(p_hat*100)/100


In [None]:
get_sentiment('Can\'t imagine a better movie. \n  All other movies wake up in a cold sweat having been haunted by the spectre of the movie they could never be.\n Will watch again and again until my flesh has melded to the cinema\'s seats and they have to peel me off like an overcooked steak from a skillet.')

Viewer Comment:
Can't imagine a better movie. 
  All other movies wake up in a cold sweat having been haunted by the spectre of the movie they could never be.
 Will watch again and again until my flesh has melded to the cinema's seats and they have to peel me off like an overcooked steak from a skillet.

The Review was:
->Bad Movie!


0.0