#PRODUCTIONIZE MODELS

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

from sklearn.metrics import hamming_loss, recall_score, precision_score, f1_score
import tensorflow as tf
from keras.preprocessing.text import Tokenizer
from keras_preprocessing.sequence import pad_sequences
from nltk.stem import WordNetLemmatizer
import nltk
nltk.download('wordnet')
nltk.download('stopwords')
from nltk.corpus import stopwords

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


##END 2 END PIPLINE

In [3]:
lemmatizer = WordNetLemmatizer()
def preprocess(text):
    
    """performs common expansion of english words, preforms preprocessing"""

    text = re.sub(r"won\'t", "will not", text)   # decontracting the words
    text = re.sub(r"can\'t", "can not", text)
    text = re.sub(r"n\'t", " not", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"\'s", " is", text)
    text = re.sub(r"\'d", " would", text)
    text = re.sub(r"\'ll", " will", text)
    text = re.sub(r"\'t", " not", text)
    text = re.sub(r"\'ve", " have", text)
    text = re.sub(r"\'m", " am", text)

    text = re.sub(r'\w+:\s?','',text)                                            ## removing anyword:
    text = re.sub('[([].*?[\)]', '', text)                                       ## removing sq bracket and its content
    text = re.sub('[<[].*?[\>]', '', text)                                       ## removing <> and its content
    text = re.sub('[{[].*?[\}]', '', text)                                       ## removing {} and its content
    
    text = ' '.join([lemmatizer.lemmatize(word) for word in text.split()])       ## lemmatizing the word

    text = re.sub(r'\W', ' ', str(text))                                         # Remove all the special characters
    text = re.sub(r'\s+[a-zA-Z]\s+', ' ', text)                                  # remove all single characters 
    text = re.sub(r"[^A-Za-z0-9]", " ", text)                                    # replace all the words except "A-Za-z0-9" with space  
    text = re.sub(r'[^\w\s]','',text)
    text = ' '.join(e for e in text.split() if e.lower() not in set(stopwords.words('english')) and len(e)>2)  
    # convert to lower and remove stopwords discard words whose len < 2
    
    text = re.sub("\s\s+" , " ", text)                                           ## remove extra white space  lst
    text = text.lower().strip()   

    return text


In [17]:
#pickle.dump((tokenizer), open('/content/gdrive/MyDrive/cs1/tokenizer.pkl','wb'))

In [18]:
tokenizer = pickle.load(open('/content/gdrive/MyDrive/cs1/tokenizer.pkl', 'rb'))

def end_to_end_pipeline(string):
  path = '/content/gdrive/MyDrive/cs1/deep model final/model2_gv_deepl.h5'
  result = []
  x = preprocess(string)
  sent_token = tokenizer.texts_to_sequences([x])

  sent_token_padd = pad_sequences(sent_token, maxlen=300, dtype='int32', padding='post', truncating='post')
  model = tf.keras.models.load_model(path)
  pred = model.predict(sent_token_padd, batch_size=64)
  
  row, column = pred.shape
  predict = np.zeros((row, column))
  for i in range(row):
    for j in range(column):
      if pred[i,j]>0.5:
        predict[i,j] = 1
  
  for k in range(predict.shape[0]):
    if predict[k][0] == 1.0:
      result.append('commenting')
    if predict[k][1] == 1.0:
      result.append('ogling')
    if predict[k][2] == 1.0:
      result.append('groping')
    if np.sum(predict) == 0.0:
      result.append('None')
    
  print(f'possible action : {result}')


In [19]:
%time
query_1 = 'During morning, a woman was walking and thin guy came around and called her names, and commented on her body.'
end_to_end_pipeline(query_1)

CPU times: user 4 µs, sys: 1 µs, total: 5 µs
Wall time: 9.06 µs
possible action : ['commenting']


In [20]:
%time
query_2 = 'During morning, a woman was walking by and thin guy came and groped her.'
end_to_end_pipeline(query_2)

CPU times: user 10 µs, sys: 1e+03 ns, total: 11 µs
Wall time: 18.6 µs
possible action : ['groping']


In [21]:
%time
query_3 = 'During morning, a woman was walking by and thin guy was staring at her.'
end_to_end_pipeline(query_3)

CPU times: user 4 µs, sys: 0 ns, total: 4 µs
Wall time: 10 µs
possible action : ['ogling']


In [22]:
%time
query_4 = 'During morning, a woman was walking by and thin guy came and did nothing to her.'
end_to_end_pipeline(query_4)

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 6.68 µs
possible action : ['None']


In [23]:
%time
query_5 = 'Catcalls and passing comments were two of the ghastly things the Delhi police at the International Airport put me and my friend through. It is appalling that the protectors and law enforcers at the airport can make someone so uncomfortable.'
end_to_end_pipeline(query_5)

CPU times: user 4 µs, sys: 1 µs, total: 5 µs
Wall time: 9.54 µs
possible action : ['commenting']


In [24]:
%time
query_6 = 'This incident took place in the evening.I was in the metro when two guys started staring.'
end_to_end_pipeline(query_6)

CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs
Wall time: 6.68 µs
possible action : ['ogling']


In [25]:
%time
query_7 = 'Was walking along crowded street, holding mums hand, when an elderly man groped butt, I turned to look at him and he looked away, and did it again after a while.I was 12 yrs old then.'
end_to_end_pipeline(query_7)

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 6.44 µs
possible action : ['groping']


In [26]:
%time
query_8 = 'chain snatching evening punjabi bagh bus stop'
end_to_end_pipeline(query_8)

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 6.91 µs
possible action : ['None']


In [27]:
%time
query_9 = 'Was walking along crowded street, holding mums hand, when an elderly man groped butt, I turned to look at h7m and he looked away, and did it again after a while.I was 12 yrs old then.'
end_to_end_pipeline(query_9)

CPU times: user 4 µs, sys: 0 ns, total: 4 µs
Wall time: 8.34 µs
possible action : ['groping']


In [28]:
%time
query_10 = 'witnEsseD incident chaIn9 brutALLy snatched elderly lady incident tok pla$Ce even!ing'
end_to_end_pipeline(query_10)

CPU times: user 4 µs, sys: 1e+03 ns, total: 5 µs
Wall time: 8.82 µs
possible action : ['None']


In [29]:
%time
query_11 = 'incident kap0Pened inMide tRaI*n'
end_to_end_pipeline(query_11)

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 6.91 µs
possible action : ['None']
