In [2]:
from model.attention import Attention

from sklearn.preprocessing import StandardScaler

import tensorflow as tf
import keras
import warnings
import logging
import pickle
import numpy as np
import pandas as pd

In [3]:
warnings.filterwarnings("ignore")
tf.get_logger().setLevel(logging.ERROR)

In [4]:
pickle_file = "saved_models/preprocessing.pkl"

with open(pickle_file, "rb") as f:
    loaded_objects = pickle.load(f)

preprocessing_pipe = loaded_objects["preprocessing_pipe"]
reversed_word_index = loaded_objects["reverse_word_index"]

[nltk_data] Downloading package stopwords to C:\Users\Harshana
[nltk_data]     Lakmal\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [5]:
classification_model = keras.models.load_model(
    "saved_models/classification_model.keras", custom_objects={"Attention": Attention}
)
attention_model = keras.models.load_model("saved_models/attention_model.keras", custom_objects={"Attention": Attention})

In [6]:
sample = np.array(["TRUCK ABLAZE : R21. VOORTREKKER AVE. OUTSIDE OR TAMBO INTL. CARGO SECTION. http://t.co/8kscqKfKkF"])
sample_x = preprocessing_pipe.transform(sample)

In [7]:
def predict__(text):
    pred_text = np.array([text])
    processed_text = preprocessing_pipe.transform(pred_text)

    pred_results = classification_model.predict(processed_text)[0]
    print(pred_results)

    attention_w = attention_model.predict(processed_text)[1][0]

    word_mat = []

    for vect, weight in zip(processed_text[0], attention_w):
        try:
            word = reversed_word_index[vect]
            word_mat.append((word, weight[0]))

        except KeyError:
            break

    return word_mat

In [8]:
pred_results = predict__(
    """Subject: looking for medication ? we ` re the best source .
it is difficult to make our material condition better by the best law , but it is easy enough to ruin it by bad laws .
excuse me . . . : ) you just found the
best and simpliest site for
medication on the net . no perscription , easy
delivery .
private , secure , and easy .
better see rightly on a pound a week than squint on a million .
we ` ve got
anything that you will ever want .
erection treatment pills , anti - depressant pills , weight loss , and
more ! http : / / splicings . bombahakcx . com / 3 /
knowledge and human power are synonymous .
only high - quality stuff for low rates !
100 % moneyback guarantee !
there is no god , nature sufficeth unto herself in no wise hath she need of an author ."""
)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 768ms/step
[0.999955]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 658ms/step


In [9]:
pred_results

[('subject', 0.0019748139),
 ('looking', 0.008755728),
 ('medication', 0.012612656),
 ('best', 0.013775384),
 ('source', 0.014071629),
 ('difficult', 0.013848166),
 ('make', 0.014051019),
 ('material', 0.014093628),
 ('condition', 0.014337486),
 ('better', 0.0144506665),
 ('best', 0.014554008),
 ('law', 0.014641274),
 ('easy', 0.014673417),
 ('enough', 0.014727069),
 ('ruin', 0.0147827845),
 ('bad', 0.014814659),
 ('laws', 0.014841442),
 ('excuse', 0.014834671),
 ('found', 0.0148265455),
 ('best', 0.014912011),
 ('simpliest', 0.014911364),
 ('site', 0.014851495),
 ('medication', 0.014838236),
 ('net', 0.014807653),
 ('perscription', 0.014770222),
 ('easy', 0.014748397),
 ('delivery', 0.014727385),
 ('private', 0.014653099),
 ('secure', 0.014635913),
 ('easy', 0.014544574),
 ('better', 0.014489195),
 ('see', 0.014466186),
 ('rightly', 0.01451746),
 ('pound', 0.014591532),
 ('week', 0.014582907),
 ('squint', 0.014591518),
 ('million', 0.014574224),
 ('got', 0.014503421),
 ('anything', 0.

In [11]:
gf = pd.DataFrame(pred_results, columns=["Word", "Attention Weight"])

scaler = StandardScaler()
attention = scaler.fit_transform(gf[["Attention Weight"]])
gf["Scaled Weight"] = attention
gf

Unnamed: 0,Word,Attention Weight,Scaled Weight
0,subject,0.001975,-4.650491
1,looking,0.008756,-1.914606
2,medication,0.012613,-0.358457
3,best,0.013775,0.110668
4,source,0.014072,0.230193
...,...,...,...
69,unto,0.009305,-1.692873
70,wise,0.008519,-2.010003
71,hath,0.008263,-2.113389
72,need,0.006277,-2.914592


In [16]:
attention = np.array(attention)
words = np.array(gf["Word"])

word_dictionary = {key: value[0] for key, value in zip(words, attention)}

In [17]:
word_dictionary

{'subject': -4.650491,
 'looking': -1.9146059,
 'medication': 0.53949505,
 'best': 0.56926084,
 'source': 0.23019324,
 'difficult': 0.14003278,
 'make': 0.22187768,
 'material': 0.23906904,
 'condition': 0.33745787,
 'better': 0.39866796,
 'law': 0.46002683,
 'easy': 0.42101136,
 'enough': 0.49464253,
 'ruin': 0.517122,
 'bad': 0.5299823,
 'laws': 0.5407884,
 'excuse': 0.5380566,
 'found': 0.5347781,
 'simpliest': 0.56899965,
 'site': 0.5448443,
 'net': 0.52715546,
 'perscription': 0.5120533,
 'delivery': 0.4947699,
 'private': 0.46479782,
 'secure': 0.45786396,
 'see': 0.38938445,
 'rightly': 0.41007188,
 'pound': 0.43995753,
 'week': 0.4364776,
 'squint': 0.43995187,
 'million': 0.4329744,
 'got': 0.40440768,
 'anything': 0.3169557,
 'ever': 0.43789724,
 'want': 0.4922711,
 'erection': 0.524928,
 'treatment': 0.5501155,
 'pills': 0.49472746,
 'anti': 0.5920946,
 'depressant': 0.57956904,
 'weight': 0.53840494,
 'loss': 0.51663274,
 'http': 0.4248208,
 'splicings': 0.40625793,
 'bomba