In [0]:
import numpy as np
import pandas as pd
import re
import os
import string
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import  classification_report
!gdown --id 1pdgCAl-Rbm0ou_wLqt4D2f_TaymWOUII

Downloading...
From: https://drive.google.com/uc?id=1pdgCAl-Rbm0ou_wLqt4D2f_TaymWOUII
To: /content/train.csv
68.8MB [00:00, 79.5MB/s]


In [0]:
# Data
data = pd.read_csv('train.csv')

In [0]:
data.shape

(159571, 8)

In [0]:
# Distribution as per all the labels
classes = ['toxic','severe_toxic','obscene','threat','insult','identity_hate']
for label in classes:
  print('Distribution:\n{}\n'.format((data[label].value_counts()/data.shape[0])*100))

Distribution:
0    90.415552
1     9.584448
Name: toxic, dtype: float64

Distribution:
0    99.000445
1     0.999555
Name: severe_toxic, dtype: float64

Distribution:
0    94.705178
1     5.294822
Name: obscene, dtype: float64

Distribution:
0    99.700447
1     0.299553
Name: threat, dtype: float64

Distribution:
0    95.063639
1     4.936361
Name: insult, dtype: float64

Distribution:
0    99.119514
1     0.880486
Name: identity_hate, dtype: float64



In [0]:
# Train Test Split
train_texts, test_texts, train_labels, test_labels = train_test_split(data.comment_text, data[classes].values, test_size=0.2, random_state=42)
train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=0.2, random_state=42)

In [0]:
def data_input_fn(texts, labels, batch_size=32, is_training=True):
  # Convert the inputs to a Dataset.
  dataset = tf.data.Dataset.from_tensor_slices((texts,labels))
  # Shuffle, repeat, and batch the examples.
  dataset = dataset.cache()
  if is_training:
    dataset = dataset.shuffle(1000, reshuffle_each_iteration=True)
    dataset = dataset.repeat()
  dataset = dataset.batch(batch_size, drop_remainder=True)
  # Return the dataset.
  return dataset

# Creating Vectorization Layer
max_features = 5000
max_len = 50
vectorization_layer = tf.keras.layers.experimental.preprocessing.TextVectorization(
    max_tokens=max_features, output_sequence_length=max_len)
vectorization_layer.adapt(train_texts.values)

In [0]:
# Getting embeddings
def create_model():
  words = tf.keras.Input(shape=(1,), dtype=tf.string)
  vectors = vectorization_layer(words)
  embeddings = tf.keras.layers.Embedding(input_dim=max_features+1, output_dim=128)(vectors)
  output = tf.keras.layers.LSTM(256, return_state=True, return_sequences=True, name='LSTM_1')(embeddings)
  output = tf.keras.layers.LSTM(256, name='LSTM_2')(output)
  output = tf.keras.layers.Dense(64, activation='relu', name='Dense_3')(output)
  output = tf.keras.layers.Dense(6,activation='sigmoid', name='Output')(output)

  model = tf.keras.models.Model(words,output)
  return model

In [0]:
# For TPU execution
'''
import os
try:
 device_name = os.environ['COLAB_TPU_ADDR']
 TPU_ADDRESS = 'grpc://' + device_name
 print('Found TPU at: {}'.format(TPU_ADDRESS))
except KeyError:
 print('TPU not found')

resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.config.experimental_connect_to_cluster(resolver)
# This is the TPU initialization code that has to be at the beginning.
tf.tpu.experimental.initialize_tpu_system(resolver)
print("All devices: ", tf.config.list_logical_devices('TPU'))
strategy = tf.distribute.experimental.TPUStrategy(resolver)

with strategy.scope():
  model = create_model()
  model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Data
training_dataset = data_input_fn(train_texts, train_labels)
validation_dataset = data_input_fn(val_texts, val_labels, batch_size=512, is_training=False)
test_dataset = data_input_fn(test_texts, test_labels, batch_size=512, is_training=False)

batch_size = 32
epochs = 100
steps_per_epoch = train_sequences.shape[0] // batch_size

model.fit(training_dataset, epochs=epochs, batch_size=batch_size, 
          steps_per_epoch=steps_per_epoch, validation_data=validation_dataset)
'''          

'\nimport os\ntry:\n device_name = os.environ[\'COLAB_TPU_ADDR\']\n TPU_ADDRESS = \'grpc://\' + device_name\n print(\'Found TPU at: {}\'.format(TPU_ADDRESS))\nexcept KeyError:\n print(\'TPU not found\')\n\nresolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu=\'grpc://\' + os.environ[\'COLAB_TPU_ADDR\'])\ntf.config.experimental_connect_to_cluster(resolver)\n# This is the TPU initialization code that has to be at the beginning.\ntf.tpu.experimental.initialize_tpu_system(resolver)\nprint("All devices: ", tf.config.list_logical_devices(\'TPU\'))\nstrategy = tf.distribute.experimental.TPUStrategy(resolver)\n\nwith strategy.scope():\n  model = create_model()\n  model.compile(loss=\'binary_crossentropy\', optimizer=\'adam\', metrics=[\'accuracy\'])\n\n# Data\ntraining_dataset = data_input_fn(train_texts, train_labels)\nvalidation_dataset = data_input_fn(val_texts, val_labels, batch_size=512, is_training=False)\ntest_dataset = data_input_fn(test_texts, test_labels, batch_size=512, 

In [0]:
# For GPU Execution
model = create_model()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Data
training_dataset = data_input_fn(train_texts, train_labels)
validation_dataset = data_input_fn(val_texts, val_labels, batch_size=512, is_training=False)
test_dataset = data_input_fn(test_texts, test_labels, batch_size=512, is_training=False)

batch_size = 32
epochs = 1
steps_per_epoch = train_texts.shape[0] // batch_size

model.fit(training_dataset, epochs=epochs, batch_size=batch_size, 
          steps_per_epoch=steps_per_epoch, validation_data=validation_dataset)



<tensorflow.python.keras.callbacks.History at 0x7f32bed6e8d0>

In [0]:
model.predict(['I hate black people'])

array([[0.19375521, 0.01254728, 0.10115671, 0.00688036, 0.08974175,
        0.02121467]], dtype=float32)

In [0]:
model.evaluate(test_dataset)



[0.09646929800510406, 0.994140625]

In [0]:
# Test
model.predict(['You are an asshole!'])

array([[0.6161914 , 0.13870525, 0.42414263, 0.03978828, 0.40349987,
        0.08646153]], dtype=float32)

In [0]:
# Model Evaluation and Inference functions
def evaluate(predictions, truth):
  thresholds=[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
  for val in thresholds:
    pred = predictions.copy()
    pred = np.where(pred>=val,1,0)
    report = classification_report(truth, pred)
    print("Classification report for threshold {}".format(val))
    print("Classification Report:\n {}\n".format(report))

evaluate(model.predict(test_texts), test_labels)

Classification report for threshold 0.1
Classification Report:
               precision    recall  f1-score   support

           0       0.43      0.67      0.53      3056
           1       0.24      0.64      0.35       321
           2       0.46      0.61      0.53      1715
           3       0.13      0.03      0.04        74
           4       0.46      0.59      0.52      1614
           5       0.11      0.18      0.14       294

   micro avg       0.41      0.61      0.49      7074
   macro avg       0.31      0.45      0.35      7074
weighted avg       0.42      0.61      0.49      7074
 samples avg       0.05      0.06      0.05      7074


Classification report for threshold 0.2
Classification Report:
               precision    recall  f1-score   support

           0       0.63      0.50      0.56      3056
           1       0.27      0.16      0.20       321
           2       0.60      0.48      0.53      1715
           3       0.00      0.00      0.00        74
   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [0]:
os.getcwd()

'/content'

In [0]:
# Saving Model
MODEL_DIR = os.getcwd()
version = 1
export_path = os.path.join(MODEL_DIR, str(version))
print('export_path = {}\n'.format(export_path))

tf.keras.models.save_model(
    model,
    export_path,
    overwrite=True,
    include_optimizer=True,
    save_format=None,
    signatures=None,
    options=None
)

print('\nSaved model:')
!ls -l {export_path}

export_path = /content/1

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: /content/1/assets

Saved model:
total 1752
drwxr-xr-x 2 root root    4096 Jun  9 15:13 assets
-rw-r--r-- 1 root root 1782893 Jun  9 15:13 saved_model.pb
drwxr-xr-x 2 root root    4096 Jun  9 15:13 variables


In [0]:
!saved_model_cli show --dir {export_path} --all


MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:

signature_def['__saved_model_init_op']:
  The given SavedModel SignatureDef contains the following input(s):
  The given SavedModel SignatureDef contains the following output(s):
    outputs['__saved_model_init_op'] tensor_info:
        dtype: DT_INVALID
        shape: unknown_rank
        name: NoOp
  Method name is: 

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['input_2'] tensor_info:
        dtype: DT_STRING
        shape: (-1, 1)
        name: serving_default_input_2:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['Output'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 6)
        name: StatefulPartitionedCall:0
  Method name is: tensorflow/serving/predict
W0609 15:13:36.065053 140702980573056 deprecation.py:506] From /usr/local/lib/python2.7/dist-packages/tensorflow_core/python/ops/resource_va

In [0]:
# Updating apt repo for tensorflow-model-server
!echo "deb http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal" | tee /etc/apt/sources.list.d/tensorflow-serving.list && \
curl https://storage.googleapis.com/tensorflow-serving-apt/tensorflow-serving.release.pub.gpg | apt-key add -
!apt update

deb http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100  2943  100  2943    0     0  40875      0 --:--:-- --:--:-- --:--:-- 40875
OK
Get:1 http://storage.googleapis.com/tensorflow-serving-apt stable InRelease [3,012 B]
Get:2 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran35/ InRelease [3,626 B]
Ign:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  InRelease
Get:4 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic InRelease [21.3 kB]
Get:5 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]
Hit:6 http://archive.ubuntu.com/ubuntu bionic InRelease
Ign:7 https://developer.download.nvidia.com/compute/machine-learning/repos/u

In [0]:
# Installing Tensorflow Model Server
!apt-get install tensorflow-model-server

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following NEW packages will be installed:
  tensorflow-model-server
0 upgraded, 1 newly installed, 0 to remove and 66 not upgraded.
Need to get 187 MB of archives.
After this operation, 0 B of additional disk space will be used.
Get:1 http://storage.googleapis.com/tensorflow-serving-apt stable/tensorflow-model-server amd64 tensorflow-model-server all 2.2.0 [187 MB]
Fetched 187 MB in 3s (67.3 MB/s)
Selecting previously unselected package tensorflow-model-server.
(Reading database ... 144467 files and directories currently installed.)
Preparing to unpack .../tensorflow-model-server_2.2.0_all.deb ...
Unpacking tensorflow-model-server (2.2.0) ...
Setting up tensorflow-model-server (2.2.0) ...


In [0]:
# Adding models directory to env variables
os.environ["MODEL_DIR"] = MODEL_DIR

In [0]:
%%bash --bg 
nohup tensorflow_model_server \
  --rest_api_port=8501 \
  --model_name=sample_model \
  --model_base_path="${MODEL_DIR}" >server.log 2>&1

Starting job # 0 in a separate thread.


In [0]:
!tail server.log

2020-06-09 15:13:57.653803: I external/org_tensorflow/tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2020-06-09 15:13:57.755291: I external/org_tensorflow/tensorflow/cc/saved_model/loader.cc:234] Restoring SavedModel bundle.
2020-06-09 15:13:57.931910: I external/org_tensorflow/tensorflow/cc/saved_model/loader.cc:183] Running initialization op on SavedModel bundle at path: /content/1
2020-06-09 15:13:58.053686: I external/org_tensorflow/tensorflow/cc/saved_model/loader.cc:364] SavedModel load for tags { serve }; Status: success: OK. Took 431720 microseconds.
2020-06-09 15:13:58.069180: I tensorflow_serving/servables/tensorflow/saved_model_warmup.cc:105] No warmup data file found at /content/1/assets.extra/tf_serving_warmup_requests
2020-06-09 15:13:58.069357: I tensorflow_serving/core/loader_harness.cc:87] Successfully loaded servable version {name: sample_model version: 1}
2020-06-09 15:13

In [0]:
import json
test_case = [['Fuck off! I do not love you']]
data = json.dumps({"signature_name": "serving_default", "instances": test_case})
print('Data: {}'.format(data))

Data: {"signature_name": "serving_default", "instances": [["Fuck off! I do not love you"]]}


In [0]:
import requests
headers = {"content-type": "application/json"}
json_response = requests.post('http://localhost:8501/v1/models/sample_model:predict', data=data, headers=headers)
predictions = json.loads(json_response.text)
print(predictions)

{'predictions': [[0.600387394, 0.0921687856, 0.383404642, 0.0241133831, 0.360389948, 0.0641452521]]}


In [0]:
model.predict(['Fuck off! I do not love you'])

array([[0.6003875 , 0.09216882, 0.38340476, 0.02411339, 0.36039   ,
        0.06414524]], dtype=float32)