## Data Science UA Conference Workshop
### Name : Prithvi Shetty

### Topic : Building deep learning NLP models and deploying it

## Code Index:


### 1. Importing libraries

### 2. Reading data and cleaning it

### 3. Tokenization , preprocessing and Vectorization

### 4. Model building , training and saving it in deployable format

### 5. Testing and using the deployed model 

## 1. Importing libraries

In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity="all"
import json, requests
import pandas as pd
import numpy as np
import sys, os, re, csv, codecs
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Dense, Input, LSTM, Embedding, Dropout, Activation
from tensorflow.keras.layers import Bidirectional, GlobalMaxPool1D
from tensorflow.keras.models import Model
from tensorflow.keras import initializers, regularizers, constraints, optimizers, layers
from tqdm import tqdm_notebook

In [2]:
import tensorflow
tensorflow.__version__

'2.3.0'

In [3]:
GLOVE_FILE="glove.6B.100d.txt" #http://nlp.stanford.edu/data/glove.6B.zip

In [4]:
embed_size = 100 #Word vector size
max_features = 8000 #Unique words 
maxlen = 100 #Limit to number of words to consider in a single document

## 2. Reading data and cleaning it

In [5]:
df1 = pd.read_csv("elmo_sentiment_train.csv",encoding="latin")
df2 = pd.read_csv("elmo_sentiment_test.csv", encoding="latin")

In [6]:
df1.head()
df2.tail()

Unnamed: 0,sentence,sentiment,polarity
0,Flat characters that you do not and never will...,3,0
1,What the heck was this. Somebody obviously rea...,4,0
2,HORRID!!<br /><br />The special effects make t...,2,0
3,"Right, then, he's absolutely brilliant. But yo...",10,1
4,A hilarious Neil Simon comedy that evokes laug...,10,1


Unnamed: 0,sentence,sentiment,polarity
24995,There's simply no redeeming quality about this...,1,0
24996,total crap.<br /><br />I was kind of excited t...,1,0
24997,I bought this film from my local blockbuster f...,8,1
24998,"I was blown away when I saw ""The Best Years of...",10,1
24999,I first saw this in the 70s on syndicated TV a...,7,1


In [7]:
df=df1.append(df2, ignore_index=True)

In [8]:
df.head(2)
df.shape

Unnamed: 0,sentence,sentiment,polarity
0,Flat characters that you do not and never will...,3,0
1,What the heck was this. Somebody obviously rea...,4,0


(50000, 3)

In [9]:
df.isnull().sum()

sentence     0
sentiment    0
polarity     0
dtype: int64

In [10]:
df.head()

Unnamed: 0,sentence,sentiment,polarity
0,Flat characters that you do not and never will...,3,0
1,What the heck was this. Somebody obviously rea...,4,0
2,HORRID!!<br /><br />The special effects make t...,2,0
3,"Right, then, he's absolutely brilliant. But yo...",10,1
4,A hilarious Neil Simon comedy that evokes laug...,10,1


In [11]:
df.dropna(how='any', inplace=True)

In [12]:
df.dtypes


sentence     object
sentiment     int64
polarity      int64
dtype: object

In [13]:
def myfillna(series):
    if series.dtype is pd.np.dtype(float):
        return series.fillna('')
    elif series.dtype is pd.np.dtype(int):
        return series.fillna('')
    else:
        return series.fillna('NA')

In [14]:
df=df.apply(myfillna)

In [15]:
X = np.array(df["sentence"])
y = np.array(df["polarity"])

In [16]:
list_sentences_train=X

In [17]:
df.polarity.value_counts()

1    25000
0    25000
Name: polarity, dtype: int64

## 3. Tokenization, preprocessing and vectorization

In [18]:
tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(list(list_sentences_train))
list_tokenized_train = tokenizer.texts_to_sequences(list_sentences_train)
x_train = pad_sequences(list_tokenized_train, maxlen=maxlen)

In [19]:
def get_coefs(word,*arr):
    return word, np.asarray(arr, dtype='float32')

In [20]:
embeddings_index = dict(get_coefs(*o.strip().split()) for o in open(GLOVE_FILE, encoding='utf-8'))

In [62]:
embeddings_index['the']

array([-0.038194, -0.24487 ,  0.72812 , -0.39961 ,  0.083172,  0.043953,
       -0.39141 ,  0.3344  , -0.57545 ,  0.087459,  0.28787 , -0.06731 ,
        0.30906 , -0.26384 , -0.13231 , -0.20757 ,  0.33395 , -0.33848 ,
       -0.31743 , -0.48336 ,  0.1464  , -0.37304 ,  0.34577 ,  0.052041,
        0.44946 , -0.46971 ,  0.02628 , -0.54155 , -0.15518 , -0.14107 ,
       -0.039722,  0.28277 ,  0.14393 ,  0.23464 , -0.31021 ,  0.086173,
        0.20397 ,  0.52624 ,  0.17164 , -0.082378, -0.71787 , -0.41531 ,
        0.20335 , -0.12763 ,  0.41367 ,  0.55187 ,  0.57908 , -0.33477 ,
       -0.36559 , -0.54857 , -0.062892,  0.26584 ,  0.30205 ,  0.99775 ,
       -0.80481 , -3.0243  ,  0.01254 , -0.36942 ,  2.2167  ,  0.72201 ,
       -0.24978 ,  0.92136 ,  0.034514,  0.46745 ,  1.1079  , -0.19358 ,
       -0.074575,  0.23353 , -0.052062, -0.22044 ,  0.057162, -0.15806 ,
       -0.30798 , -0.41625 ,  0.37972 ,  0.15006 , -0.53212 , -0.2055  ,
       -1.2526  ,  0.071624,  0.70565 ,  0.49744 , 

In [22]:
all_embs = np.stack(embeddings_index.values())
emb_mean,emb_std = all_embs.mean(), all_embs.std()

word_index = tokenizer.word_index
nb_words = min(max_features, len(word_index))
embedding_matrix = np.random.normal(emb_mean, emb_std, (nb_words, embed_size))


  """Entry point for launching an IPython kernel.


In [23]:
for word, i in word_index.items():
    if i >= max_features:
        continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector

## 4. Model building , training and saving it in deployable format

In [25]:
inp = Input(shape=(maxlen,))
x = Embedding(max_features, embed_size, weights=[embedding_matrix])(inp)
x = Bidirectional(LSTM(256, return_sequences=True))(x)
x = GlobalMaxPool1D()(x)
x = Dense(100, activation="relu")(x)
x = Dropout(0.2)(x)
x = Dense(1, activation="sigmoid")(x)
model = Model(inputs=inp, outputs=x)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [26]:
history=model.fit(x_train, y, batch_size=32, epochs=3, verbose=1, validation_split=0.1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [27]:
#Saving the model
model.save('glove_sentiment_concur_1120.h5')

In [28]:
import calendar;
import time;
ts = calendar.timegm(time.gmtime())

In [29]:
calendar.timegm(time.gmtime())

1605875375

In [30]:
round(time.time())

1605875375

In [31]:

model_name = 'model_tf_serving_data_science_ua' + str(round(time.time()))
model.save( model_name ,save_format ='tf')

## 5. Testing and using the deployed model 

In [32]:
test=pd.read_csv('data_science_ua_test.csv',encoding='latin-1')

In [33]:
test.head(5)

Unnamed: 0,text,label
0,I always wrote this series off as being a comp...,0
1,1st watched 12/7/2002 - 3 out of 10(Dir-Steve ...,0
2,This movie was so poorly written and directed ...,0
3,The most interesting thing about Miryang (Secr...,1
4,"when i first read about ""berlin am meer"" i did...",0


In [34]:
list_sentences_test = test["text"].fillna("na").values
list_tokenized_test = tokenizer.texts_to_sequences(list_sentences_test)
x_test = pad_sequences(list_tokenized_test, maxlen=maxlen)

In [35]:
y_test = model.predict([x_test], batch_size=32, verbose=1)



In [36]:
test['Sentiment']=y_test


In [37]:
pd.set_option('display.max_colwidth', -1)

In [67]:
test.head(3)

Unnamed: 0,text,label,Sentiment,Sentiment_score
0,"I always wrote this series off as being a complete stink-fest because Jim Belushi was involved in it, and heavily. But then one day a tragic happenstance occurred. After a White Sox game ended I realized that the remote was all the way on the other side of the room somehow. Now I could have just gotten up and walked across the room to get the remote, or even to the TV to turn the channel. But then why not just get up and walk across the country to watch TV in another state? ""Nuts to that"", I said. So I decided to just hang tight on the couch and take whatever Fate had in store for me. What Fate had in store was an episode of this show, an episode about which I remember very little except that I had once again made a very broad, general sweeping blanket judgment based on zero objective or experiential evidence with nothing whatsoever to back my opinions up with, and once again I was completely right! This show is a total crud-pie! Belushi has all the comedic delivery of a hairy lighthouse foghorn. The women are physically attractive but too Stepford-is to elicit any real feeling from the viewer. There is absolutely no reason to stop yourself from running down to the local TV station with a can of gasoline and a flamethrower and sending every copy of this mutt howling back to hell. <br /><br />Except.. <br /><br />Except for the wonderful comic sty lings of Larry Joe Campbell, America's Greatest Comic Character Actor. This guy plays Belushi's brother-in-law, Andy, and he is gold. How good is he really? Well, aside from being funny, his job is to make Belushi look good. That's like trying to make butt warts look good. But Campbell pulls it off with style. Someone should invent a Nobel Prize in Comic Buffoonery so he can win it every year. Without Larry Joe this show would consist of a slightly vacant looking Courtney Thorne-Smith smacking Belushi over the head with a frying pan while he alternately beats his chest and plays with the straw on the floor of his cage. 5 stars for Larry Joe Campbell designated Comedic Bacon because he improves the flavor of everything he's in!",0,0.45579,1
1,"1st watched 12/7/2002 - 3 out of 10(Dir-Steve Purcell): Typical Mary Kate & Ashley fare with a few more kisses. It looks to me like the girls are getting pretty tired of this stuff and it will be interesting what happens to them if they ever decide to split up and go there own ways. In this episode of their adventures they are interns in Rome for a `fashion' designer who puts them right into the mailroom to learn what working hard is all about(I guess..). Besides the typical flirtations with boys there is nothing much else except the Rome scenario until about Â¾ way into the movie when it's finally revealed why they are getting fired, then re-hired, then fired again, then re-hired again. This is definetly made by people who don't understand the corporate world and it shows in their interpretation of it. Maybe the real world will be their next adventure(if there is one.). Even my kids didn't seem to care for this boring `adventure' in the make-believe. Let's see they probably only have a couple of years till their legal adults. We'll see what happens then.",0,0.024477,0
2,"This movie was so poorly written and directed I fell asleep 30 minutes through the movie. The jokes in the movie are corny and even though the plot is interesting at some angles, it is too far fetched and at some points- ridiculous. If you are 11 or older you will overlook the writing in the movie and be disappointed, but if you are 10 or younger this is a film that will capture your attention and be amazed with all the stunts (which I might add are poorly done) and wish you were some warrior to. The casting in this movie wasn't very good, and the music was very disappointing because it was like they were trying to build up the tension but it didn't fit at all. On a scale of 1-10 (10 being excellent, 1 being horrible) the acting in this movie is a 4. Brenda Song is talented in comedy, but with this kind of movie, in some of the more serious scenes, her acting was laughable. When she made some of her ""fighting"" poses, I started laughing out loud. I think the worst thing about this movie is definitely the directing, for example, the part where her enemy turns out to be the person the evil villain is possesing, how her voice turns dark and evil, I think that was incredibly stupid, and how Wendy's (Brenda Song)teachers were all her teachers at school being possessed by monks, that was pretty ridiculous to. So to sumamrize it all, a disappointing movie, but okay if you're 10 or under.",0,0.000735,0


In [39]:
test['Sentiment_score']=np.where(test.Sentiment>=0.44,1,0)

In [65]:
test[test.Sentiment_score==1][2:6]

Unnamed: 0,text,label,Sentiment,Sentiment_score
5,"I saw this film on September 1st, 2005 in Indianapolis. I am one of the judges for the Heartland Film Festival that screens films for their Truly Moving Picture Award. A Truly Moving Picture ""...explores the human journey by artistically expressing hope and respect for the positive values of life."" Heartland gave that award to this film.<br /><br />This is a story of golf in the early part of the 20th century. At that time, it was the game of upper class and rich ""gentlemen"", and working people could only participate by being caddies at country clubs. With this backdrop, this based-on-a-true-story unfolds with a young, working class boy who takes on the golf establishment and the greatest golfer in the world, Harry Vardon.<br /><br />And the story is inspirational. Against all odds, Francis Ouimet (played by Shia LaBeouf of ""Holes"") gets to compete against the greatest golfers of the U.S. and Great Britain at the 1913 U.S. Open. Francis is ill-prepared, and has a child for a caddy. (The caddy is hilarious and motivational and steals every scene he appears in.) But despite these handicaps, Francis displays courage, spirit, heroism, and humility at this world class event.<br /><br />And, we learn a lot about the early years of golf; for example, the use of small wooden clubs, the layout of the short holes, the manual scoreboard, the golfers swinging with pipes in their mouths, the terrible conditions of the greens and fairways, and the play not being canceled even in torrential rain.<br /><br />This film has stunning cinematography and art direction and editing. And with no big movie stars, the story is somehow more believable.<br /><br />This adds to the inventory of great sports movies in the vein of ""Miracle"" and ""Remember the Titans.""<br /><br />FYI - There is a Truly Moving Pictures web site where there is a listing of past winners going back 70 years.",1,0.94471,1
7,"William Hurt may not be an American matinee idol anymore, but he still has pretty good taste in B-movie projects. Here, he plays a specialist in hazardous waste clean-ups with a tragic past tracking down a perennial loser on the run --played by former pretty-boy Weller-- who has been contaminated with a deadly poison. Current pretty-boy Hardy Kruger Jr --possibly more handsome than his dad-- is featured as Weller's arrogant boss in a horrifying sequence at a chemical production plant which gets the story moving. Natasha McElhone is a slightly wacky government agent looking into the incident who provides inevitable & high-cheekboned love interest for hero Hurt. Michael Brandon pops up to play a slimy take-no-prisoners type whose comeuppance you can't wait for. The Coca-Cola company wins the Product Placement award for 2000 as the soft drink is featured throughout the production, shot lovingly on location in a wintery picture-postcard Hungary.",1,0.848766,1
11,"I really enjoyed the detail that went into the script.<br /><br />Jonathan Rhys Myers (misspelled) and Jewel were outstanding in their support roles. As was Jeffery Wright. Toby McGuire gave as fine a acting job as ever depicted, when he had to amputate his best friend's arm, knowing he would die without the procedure. <br /><br />Attention to detail, with good dialect coaching to catch the Southern accent incredibly well.<br /><br />Why this movie was swept under the rug by the Hollywood promoters I can only imagine. I have strong suspicions. Which makes it all the more appealing to me. I have given a dozen DVD copies out for presents.<br /><br />Completely overlooked movie. Rent or buy it and give it your full attention for a couple of hours, then judge.",1,0.967808,1
17,"Brass pictures (movies is not a fitting word for them) really are somewhat brassy. Their alluring visual qualities are reminiscent of expensive high class TV commercials. But unfortunately Brass pictures are feature films with the pretense of wanting to entertain viewers for over two hours! In this they fail miserably, their undeniable, but rather soft and flabby than steamy, erotic qualities non withstanding.<br /><br />Senso '45 is a remake of a film by Luchino Visconti with the same title and Alida Valli and Farley Granger in the lead. The original tells a story of senseless love and lust in and around Venice during the Italian wars of independence. Brass moved the action from the 19th into the 20th century, 1945 to be exact, so there are Mussolini murals, men in black shirts, German uniforms or the tattered garb of the partisans. But it is just window dressing, the historic context is completely negligible.<br /><br />Anna Galiena plays the attractive aristocratic woman who falls for the amoral SS guy who always puts on too much lipstick. She is an attractive, versatile, well trained Italian actress and clearly above the material. Her wide range of facial expressions (signalling boredom, loathing, delight, fear, hate ... and ecstasy) are the best reason to watch this picture and worth two stars. She endures this basically trashy stuff with an astonishing amount of dignity. I wish some really good parts come along for her. She really deserves it.",0,0.51114,1


In [55]:
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

In [56]:
y_true = test.label
y_pred = test.Sentiment_score
precision_recall_fscore_support(y_true, y_pred, average='macro')
accuracy_score(y_true, y_pred)

(0.9591623001435945, 0.9587726350905403, 0.9587899711273735, None)

0.9588

## Using tensorflow serving 

### https://medium.com/@prithvishetty/deploying-machine-learning-models-in-aws-tensorflow-c5265aed4def

#### docker run -p 8501:8501   --mount type=bind,
#### source=/path/to/model/model_tf_serving_data_science_ua1605875375/,
#### target=/models/model_tf_serving_data_science_ua1605875375
#### -e NAME=model_tf_serving_data_science_ua1605875375 
#### -t tensorflow/serving

In [41]:
def get_rest_url(model_name, host='localhost', port='8501', verb='predict'):
    url = f"http://{host}:{port}/v1/models/{model_name}:{verb}" 
    return url


def rest_request(data, url):
    """Example inference of a text classification""" 
    payload = json.dumps({"instances": [data]})
    #print(payload)
    response = requests.post(url=url, data=payload)
    return response




In [42]:
url = get_rest_url(model_name=model_name)
url

'http://localhost:8501/v1/models/model_tf_serving_data_science_ua1605875375:predict'

In [47]:
predictions=[]
for i in tqdm_notebook(range(0,len(x_test))):
    predictions.append([j for i,j in rest_request(data=(x_test[i]).tolist(), url=url).json().items()][0][0])

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


HBox(children=(FloatProgress(value=0.0, max=5000.0), HTML(value='')))




In [50]:
predictions[0:10]

[[0.455790073],
 [0.0244773626],
 [0.000735044479],
 [0.979729056],
 [0.0289501846],
 [0.944710135],
 [0.0219373405],
 [0.848766208],
 [0.000408679247],
 [0.000594556332]]