<a href="https://colab.research.google.com/github/mr-haseeb/Artificial-Intelligence/blob/master/Copy_of_data_piplines.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing Libraries

In [0]:
!pip install pymysql
!pip install bert-for-tf2
!pip install sentencepiece

import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras import layers
import bert
import re

import pandas as pd
from sqlalchemy import create_engine
import pymysql
import numpy as np

from sklearn import preprocessing
from sklearn.model_selection import train_test_split 


# Fetching Data from MySQL Database

In [0]:
sqlEngine= create_engine('mysql+pymysql://collab:Collab2020@@51.15.95.84', pool_recycle=3600)
dbConnection= sqlEngine.connect()
df= pd.read_sql("select subject as senderaddress,JSON_EXTRACT(folderlabel, '$[0]')  AS target from socialhuman.emails WHERE  1=1 ORDER BY datetime ASC, RAND() LIMIT 5000", dbConnection);
df.head(10)

# Spliting and Encoding Categorical Features

In [0]:
label_encoder = preprocessing.LabelEncoder()
df['targetB'] = label_encoder.fit_transform(df['target'])
df['senderaddress']= pd.Series(df['senderaddress'], dtype="string")
 
# X = (np.array(df['senderaddress']))
y = (np.array(df['targetB']))

 
X = df['senderaddress']
# y = df['targetB']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13)
print("Train dataset shape: {0}, \nTest dataset shape: {1}".format(X_train.shape, X_test.shape))

# Cleaning Text Data

In [0]:
def preprocess_text(sen):
    # Removing html tags
    sentence = remove_tags(sen)

    # Remove punctuations and numbers
    sentence = re.sub('[^a-zA-Z]', ' ', sentence)

    # Single character removal
    sentence = re.sub(r"\s+[a-zA-Z]\s+", ' ', sentence)

    # Removing multiple spaces
    sentence = re.sub(r'\s+', ' ', sentence)

    return sentence
    
TAG_RE = re.compile(r'<[^>]+>')

def remove_tags(text):
    return TAG_RE.sub('', text)

In [0]:
clean_text = []
sentences = list(X)
for sen in sentences:
    clean_text.append(preprocess_text(sen))

In [0]:
print(clean_text[3])
print(y[3])

# Creating a BERT Tokenizer

In [0]:
BertTokenizer = bert.bert_tokenization.FullTokenizer
bert_layer = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/1",
                            trainable=False)
vocabulary_file = bert_layer.resolved_object.vocab_file.asset_path.numpy()
to_lower_case = bert_layer.resolved_object.do_lower_case.numpy()
tokenizer = BertTokenizer(vocabulary_file, to_lower_case)

In [0]:
def tokenizing_text(text):
    return tokenizer.convert_tokens_to_ids(tokenizer.tokenize(text))

In [0]:
tokenized_text = [tokenizing_text(text) for text in clean_text]

In [0]:
print(tokenized_text)

# Prerparing Data For Training

In [0]:
import random
text_with_len = [[text, y[i], len(text)] for i, text in enumerate(tokenized_text)]
random.shuffle(text_with_len)
text_with_len.sort(key=lambda x: x[2])


sorted_text_labels = [(text_lab[0], text_lab[1]) for text_lab in text_with_len]
processed_dataset = tf.data.Dataset.from_generator(lambda: sorted_text_labels, output_types=(tf.int32, tf.int32))

In [0]:
BATCH_SIZE = 32
batched_dataset = processed_dataset.padded_batch(BATCH_SIZE, padded_shapes=((None, ), ()))
next(iter(batched_dataset))

In [0]:
import math

In [0]:
TOTAL_BATCHES = math.ceil(len(sorted_text_labels) / BATCH_SIZE)
TEST_BATCHES = TOTAL_BATCHES // 10
batched_dataset.shuffle(TOTAL_BATCHES)
test_data = batched_dataset.take(TEST_BATCHES)
train_data = batched_dataset.skip(TEST_BATCHES)

# Creating the Model

In [0]:
class TEXT_MODEL(tf.keras.Model):
    
    def __init__(self,
                 vocabulary_size,
                 embedding_dimensions=128,
                 cnn_filters=50,
                 dnn_units=512,
                 model_output_classes=13,
                 dropout_rate=0.1,
                 training=False,
                 name="text_model"):
        super(TEXT_MODEL, self).__init__(name=name)
        
        self.embedding = layers.Embedding(vocabulary_size,
                                          embedding_dimensions)
        self.cnn_layer1 = layers.Conv1D(filters=cnn_filters,
                                        kernel_size=2,
                                        padding="valid",
                                        activation="relu")
        self.cnn_layer2 = layers.Conv1D(filters=cnn_filters,
                                        kernel_size=3,
                                        padding="valid",
                                        activation="relu")
        self.cnn_layer3 = layers.Conv1D(filters=cnn_filters,
                                        kernel_size=4,
                                        padding="valid",
                                        activation="relu")
        self.pool = layers.GlobalMaxPool1D()
        
        self.dense_1 = layers.Dense(units=dnn_units, activation="relu")
        self.dropout = layers.Dropout(rate=dropout_rate)
        if model_output_classes == 2:
            self.last_dense = layers.Dense(units=1,
                                           activation="sigmoid")
        else:
            self.last_dense = layers.Dense(units=model_output_classes,
                                           activation="softmax")
    
    def call(self, inputs, training):
        l = self.embedding(inputs)
        l_1 = self.cnn_layer1(l) 
        l_1 = self.pool(l_1) 
        l_2 = self.cnn_layer2(l) 
        l_2 = self.pool(l_2)
        l_3 = self.cnn_layer3(l)
        l_3 = self.pool(l_3) 
        
        concatenated = tf.concat([l_1, l_2, l_3], axis=-1) # (batch_size, 3 * cnn_filters)
        concatenated = self.dense_1(concatenated)
        concatenated = self.dropout(concatenated, training)
        model_output = self.last_dense(concatenated)
        
        return model_output

In [0]:
VOCAB_LENGTH = len(tokenizer.vocab)
EMB_DIM = 200
CNN_FILTERS = 100
DNN_UNITS = 256
OUTPUT_CLASSES = 2

DROPOUT_RATE = 0.2

NB_EPOCHS = 5

In [0]:
text_model = TEXT_MODEL(vocabulary_size=VOCAB_LENGTH,
                        embedding_dimensions=EMB_DIM,
                        cnn_filters=CNN_FILTERS,
                        dnn_units=DNN_UNITS,
                        model_output_classes=OUTPUT_CLASSES,
                        dropout_rate=DROPOUT_RATE)

In [0]:
if OUTPUT_CLASSES == 2:
    text_model.compile(loss="binary_crossentropy",
                       optimizer="adam",
                       metrics=["accuracy"])
else:
    text_model.compile(loss="sparse_categorical_crossentropy",
                       optimizer="adam",
                       metrics=["sparse_categorical_accuracy"])

In [0]:
text_model.fit(train_data, epochs=NB_EPOCHS)

In [0]:
# callbacks = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy',mode='max')
# history = model.fit(train_ds, epochs=2, validation_data=valid_ds)

In [0]:
import matplotlib.pyplot as plt
history_dict = bert_history.history
history_dict.keys()
acc = history_dict['accuracy']
val_acc = history_dict['val_accuracy']
loss = history_dict['loss']
val_loss = history_dict['val_loss']



# Evaluate the model
And let's see how the model performs. Two values will be returned. Loss (a number which represents our error, lower values are better), and accuracy.



In [0]:
results = model.evaluate(test_data.batch(512), verbose=2)
for name, value in zip(model.metrics_names, results):
  print("%s: %.3f" % (name, value))

In [0]:
epochs = range(1, len(acc) + 1)

# "bo" is for "blue dot"
plt.plot(epochs, loss, 'bo', label='Training loss')
# b is for "solid blue line"
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

In [0]:
plt.clf()   # clear figure

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

# Add inputs: way 1

In [0]:
@tf.function(input_signature=[tf.TensorSpec([None,], tf.string)])

def classify(asunto):
  classes = etiquetas.classes_
  softmax = tf.math.softmax(model(asunto))
  scores, indices = tf.math.top_k(softmax, k=2)
  classes = tf.gather(classes, indices)
 
  return {'item': asunto, 'classes': classes, 'scores' : scores}

signatures={'classify': classify}

In [0]:
MODEL_DIR = F'/modelos/'
import os 
version=6
##modulemodel = MyModule(new_model)
export_path = os.path.join(MODEL_DIR, str(version))


tf.saved_model.save(    model,    export_path      ,signatures)


In [0]:
 my_reviews=['this movie was awesome',
           'LinkedIn <messages-noreply@linkedin.com> was awesome',
           'I hated everything about this movie',
           'this is my favorite movie of the year']



In [0]:
#loaded_model=tf.keras.models.load_model('modelclasificaemails.h5')
loaded_model = tf.saved_model.load(export_path)

print(list(loaded_model.signatures.keys()))
DEFAULT_FUNCTION_KEY = "serving_default"
infer = loaded_model.signatures["classify"]

In [0]:
infer(tf.convert_to_tensor(my_reviews))

In [0]:
!saved_model_cli show --dir '{export_path}' --all

# Installing requirements for TensorFlow Serve

In [0]:
!echo "deb http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal" | tee /etc/apt/sources.list.d/tensorflow-serving.list && \
curl https://storage.googleapis.com/tensorflow-serving-apt/tensorflow-serving.release.pub.gpg | apt-key add -
!apt update


In [0]:
!apt-get install tensorflow-model-server

In [0]:
os.environ["MODEL_DIR"] = MODEL_DIR
print(MODEL_DIR)

In [0]:
%%bash --bg 
nohup tensorflow_model_server \
  --rest_api_port=8505 \
  --model_name=modelos \
  --model_base_path="${MODEL_DIR}" >server.log 2>&1

In [0]:
!tail server.log

# Making REST Request

In [0]:
!curl http://localhost:8505/v1/models/modelos

In [0]:
!pip install -q requests

import requests
import json
data = json.dumps({"signature_name": "classify", "instances": my_reviews})
print('Data: {} ... {}'.format(data[:50], data[len(data)-52:]))
headers = {"content-type": "application/json"}
json_response = requests.post('http://localhost:8505/v1/models/modelos:predict', data=data, headers=headers)
print(json_response.content.decode())