In [24]:
import keras
import pandas as pd
import numpy as np
import joblib

import numpy as np

from keras.layers import LSTM, Bidirectional, Embedding, Input, Flatten, Dense, BatchNormalization, Dropout, Conv1D, Concatenate, MaxPool1D, AveragePooling1D, GlobalAveragePooling1D, GlobalMaxPool1D, TimeDistributed, Lambda, Add

from keras.models import Model
from keras.preprocessing.text import Tokenizer

from keras.preprocessing.text import Tokenizer, text_to_word_sequence
from keras.preprocessing.sequence import pad_sequences


In [2]:
import transformer as tr

In [3]:
import toxicity_metrics as tm

In [4]:
import misclass_report as mr

In [5]:
from collections import defaultdict

In [6]:
import seaborn as sns; sns.set()

In [7]:
tokenizer = joblib.load('./tokenizer')
embedding = joblib.load('./embedding')

In [8]:
def convert_text_for_prediction(text, tokenizer=tokenizer, maxlen=100):
    return pad_sequences(tokenizer.texts_to_sequences([text]),maxlen)

In [9]:
def get_predictions(testcases, model, tokenizer,maxlen=200):
    result = []
    for testcase in testcases:
        result.append(model.predict(convert_text_for_prediction(testcase,tokenizer,maxlen)))
    return result

In [10]:
testcases = ["women are good people but men are trash","you are a mother fucking asshole",
             "You are gay","I am gay","You are dead if i see you ever again",'It\'s ridiculous that these guys are being called "protesters". Being armed is a threat of violence, which makes them terrorists.']


# Transformer Model Analysis

In [11]:
transformer_model = tr.get_transformer_model(3,embedding,200)

Instructions for updating:
Colocations handled automatically by placer.
PE shape is (200, 300)
The PE output shape is Tensor("positional_encoder_1/add:0", shape=(?, 200, 300), dtype=float32)
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [12]:
# transformer_model.load_weights('models/best_model_transformer.h5')
transformer_model.load_weights('models/best_model_transformer_new.h5')

In [14]:
def pepare_train_test(file, tokenizer,sample=None):
    import pandas as pd
    from sklearn.model_selection import train_test_split
    train_df = pd.read_csv(file)
    train_binary = np.zeros((len(train_df),))
    train_binary[train_df['target']>0.5] = 1
    train_df['target_binary'] = train_binary
    train, test  = train_test_split(train_df, test_size=0.002, random_state=42,stratify=train_df['target_binary'])
    if sample:
        train = train.sample(sample)
        test = test.sample(int(sample/10))
#     return tokenizer.texts_to_sequences(train['comment_text'].values.astype(str).tolist()), train['target_binary'].values, tokenizer.texts_to_sequences(test['comment_text'].values.astype(str).tolist()), test['target_binary'].values
    return train, test


In [15]:
train, test = pepare_train_test('train_clean.csv', tokenizer)

In [None]:
# Convert taget and identity columns to booleans
def convert_to_bool(df, col_name):
    df[col_name] = np.where(df[col_name] >= 0.5, True, False)
    
def convert_dataframe_to_bool(df):
    bool_df = df.copy()
    for col in ['binary_target']+identity_columns:
        convert_to_bool(bool_df, col)
    return bool_df

validate_df = convert_dataframe_to_bool(train_df)

In [13]:
result = get_predictions(testcases,transformer_model,tokenizer,200)

In [14]:
result

[array([[0.65063244]], dtype=float32),
 array([[0.9917435]], dtype=float32),
 array([[0.5660384]], dtype=float32),
 array([[0.47629952]], dtype=float32),
 array([[0.05181587]], dtype=float32),
 array([[0.32145128]], dtype=float32)]

In [15]:
transformer_model.layers

[<keras.engine.input_layer.InputLayer at 0x12f3570b8>,
 <keras.layers.embeddings.Embedding at 0x1320f5f28>,
 <transformer.PositionalEncoder at 0x1320f5d30>,
 <transformer.SDPA at 0x102913908>,
 <transformer.SDPA at 0x1320f5b00>,
 <transformer.SDPA at 0x1320f5d68>,
 <keras.layers.merge.Concatenate at 0x131438fd0>,
 <keras.layers.core.Dense at 0x15f907c18>,
 <keras.layers.merge.Add at 0x15fb01a90>,
 <layer_normalization.LayerNormalization at 0x15fb01da0>,
 <keras.layers.wrappers.TimeDistributed at 0x15fb74978>,
 <keras.layers.merge.Add at 0x15fb74a20>,
 <layer_normalization.LayerNormalization at 0x15fb74a58>,
 <keras.layers.pooling.GlobalMaxPooling1D at 0x15fb86518>,
 <keras.layers.pooling.GlobalAveragePooling1D at 0x15fb9fac8>,
 <keras.layers.merge.Concatenate at 0x15fb9fa58>,
 <keras.layers.core.Dense at 0x15fbb2e80>,
 <keras.layers.core.Dropout at 0x15fbde550>,
 <keras.layers.core.Dense at 0x15fbde1d0>,
 <keras.layers.core.Dropout at 0x15fbf0438>,
 <keras.layers.core.Dense at 0x15fc20

In [14]:
train_df = mr.prepare_for_report(transformer_model, tokenizer, 200, './train_clean.csv', 'comment_text', 'transformer',sample_size=0.2)

In [15]:
report = mr.get_groupwise_auc(train_df,'binary_target', 'transformer' )

In [16]:
report

Unnamed: 0,bnsp_auc,bpsn_auc,subgroup,subgroup_auc,subgroup_size
5,0.540618,0.638733,muslim,0.540418,4232
2,0.545603,0.635706,homosexual_gay_or_lesbian,0.543685,2237
3,0.543601,0.638202,christian,0.543721,8061
4,0.553867,0.636358,jewish,0.553801,1562
6,0.559881,0.636641,black,0.558341,2919
7,0.563988,0.63824,white,0.562808,5019
8,0.568935,0.637628,psychiatric_or_mental_illness,0.570423,984
1,0.578221,0.638265,female,0.577698,10770
0,0.581935,0.637268,male,0.580942,8863


In [16]:
sdpa1 = transformer_model.layers[3]
sdpa2 = transformer_model.layers[4]
sdpa3 = transformer_model.layers[5]

In [17]:
model = Model(transformer_model.input,sdpa1.output)

In [18]:
values = model.predict(convert_text_for_prediction("you are a mother fucking asshole",tokenizer,200))

In [19]:
values.shape

(1, 200, 100)

In [20]:
sdpa_weights = sdpa1.get_weights()
sdpa_weights_2 = sdpa2.get_weights()
sdpa_weights_3 = sdpa3.get_weights()

In [46]:
def compute_sdpa(sent, kernel_Q, kernel_K, kernel_V, verbose=False):
    Q = sent.dot(kernel_Q)
    if verbose:
        print ("Q is {}".format(Q))

    K = sent.dot(kernel_K)
    if verbose:
        print ("K is {}".format(K))

    V = sent.dot(kernel_V)
    if verbose:
        print ("V is {}".format(V))
    numerator = Q.dot(K.T)
    if verbose:
        print ("Numerator is {}".format(numerator))
    
    numerator_div_sqrt = numerator/np.sqrt(Q.shape[1])
    if verbose:
        print ("Numerator after div by sqrt is {}".format(numerator_div_sqrt))
    
    softmax = np.exp(numerator_div_sqrt)/np.sum(np.exp(numerator_div_sqrt), axis=1)
    if verbose:
        print ("Softmax numerator is {}".format(softmax))
        print ("Softmax shape is {}".format(softmax.shape))
    
    final = softmax.dot(V)
    if verbose:
        print ("Final dot product is {}".format(final))
    return final, softmax

In [22]:
embedding_output = transformer_model.layers[1].output

In [23]:
emb_model = Model(transformer_model.input, embedding_output)

In [47]:
def check_sent_association(sent, emb_model, sdpa_weights, tokenizer, maxlen):
    emb_val = emb_model.predict(convert_text_for_prediction(sent,tokenizer,maxlen))
    final, softmax = compute_sdpa(emb_val[0], sdpa_weights[0],sdpa_weights[1],sdpa_weights[2])
    words = sent.split()
    indexes = [i for i in range(maxlen-len(words),200)]
    values = {indexes[ind]:w for ind,w in enumerate(words)}
    assoc_words = []
    attn_report = softmax.argsort(axis=1)[-len(words):][:,-len(words):]
    for ind, prob in enumerate(attn_report):
        assoc_words.append ({words[ind]:[values[k] if k in values else 'NA' for k in prob][::-1]})
    return assoc_words
    

In [48]:
def print_association(association):
    for item in association:
        print(item)

In [30]:
association = check_sent_association("women are good men are trash", emb_model, sdpa_weights_3, tokenizer, 200)

In [31]:
print_association(association)

{'women': ['good', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'are': ['NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'good': ['good', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'men': ['NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'are': ['NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'trash': ['NA', 'NA', 'NA', 'NA', 'NA', 'NA']}


In [36]:
association = check_sent_association("whenever i see that mother fucker i get angry. I think is a gone mad", emb_model, sdpa_weights_3, tokenizer, 200)
print_association(association)

{'whenever': ['NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'i': ['I', 'get', 'see', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'see': ['I', 'get', 'see', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'that': ['I', 'get', 'see', 'think', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'mother': ['see', 'I', 'get', 'think', 'angry.', 'that', 'i', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'fucker': ['I', 'get', 'see', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'i': ['fucker', 'is', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'get': ['I', 'get', 'see', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'angry.': ['I', 'get', 'see', 'think', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'I': ['I', 'get', 'see', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'N

In [34]:
association = check_sent_association("whenever i see that mother fucker i get angry. I think is a gone mad", emb_model, sdpa_weights_2, tokenizer, 200)
print_association(association)

{'whenever': ['fucker', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'i': ['that', 'mother', 'a', 'is', 'fucker', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'see': ['angry.', 'i', 'that', 'a', 'is', 'mother', 'fucker', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'that': ['angry.', 'i', 'that', 'a', 'mother', 'is', 'fucker', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'mother': ['a', 'is', 'fucker', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'fucker': ['fucker', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'i': ['NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'get': ['angry.', 'i', 'that', 'a', 'is', 'mother', 'fucker', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'angry.': ['angry.', 'i', 'that', 'a', 'mother', 'is', 'fucker', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'I': ['angry.', 'i',



In [129]:
association = check_sent_association("women are good men are trash", emb_model, sdpa_weights_2, tokenizer, 200)
print_association(association)

{'women': ['NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'are': ['trash', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'good': ['trash', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'men': ['NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'are': ['trash', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'trash': ['trash', 'NA', 'NA', 'NA', 'NA', 'NA']}




In [38]:
association = check_sent_association("It is ridiculous that these guys are being called protesters. Being armed is a threat of violence , which makes them terrorists", emb_model, sdpa_weights_3, tokenizer, 200)
print_association(association)

{'It': ['a', 'is', 'ridiculous', 'that', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'is': ['are', 'is', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'ridiculous': ['are', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'that': ['are', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'these': ['are', 'is', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'guys': ['are', 'is', 'that', 'makes', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'are': ['are', 'is', 'ridiculous', 'a', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', '

In [42]:
association = check_sent_association("haha you guys are a bunch of losers", emb_model, sdpa_weights_3, tokenizer, 200)
print_association(association)

{'haha': ['a', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'you': ['you', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'guys': ['you', 'guys', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'are': ['guys', 'you', 'haha', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'a': ['you', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'bunch': ['NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'of': ['haha', 'you', 'guys', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'losers': ['a', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}


## Transfomer trained along with YK

In [30]:
def get_multi_head_attn(input, head_size, embedding_size,name="multi_head"):
    transformers = []
    for i in range(head_size):
        x1 = tr.SDPA(150)(input)
#         xh = Dense(100, activation='relu')(x1)
        transformers.append(x1)
    x = Concatenate()(transformers)
    x = Dense(embedding_size, name=name)(x)
    return x
    

In [31]:
def get_transformer(input, head_size=5, embedding_size=100,index=0):
    multi_head = get_multi_head_attn(input, head_size, embedding_size, "multi_head_{}".format(index))
    add_out = Add()([multi_head, input])
    norm_out = tr.LayerNormalization(name="multi_head_norm_{}".format(index))(add_out)

    ffout = TimeDistributed(Dense(embedding_size,activation='relu'))(norm_out)
    add_out = Add()([norm_out, ffout])
    norm_out = tr.LayerNormalization(name="transformer_{}".format(index))(add_out)
    return norm_out

In [38]:
def get_transformer_yk_model(multi_head_size,embedding,maxlen,depth=1):
    input = Input(shape=(maxlen,), name="input_sentence")
    embx = Embedding(input_dim=embedding.shape[0], output_dim=embedding.shape[1], input_length=maxlen, weights=[embedding], trainable=True, mask_zero=False)(input)
    x = tr.PositionalEncoder()(embx)
    for i in range(depth):
        x = get_transformer(x,multi_head_size,embedding.shape[1],index=i)
#     x = Flatten()(x)
#     print(x.shape)
    maxoutput = GlobalMaxPool1D()(x)
#     maxoutput = Flatten()(maxoutput)
    avgoutput = GlobalAveragePooling1D()(x)
    transformer_output_concat = Concatenate()([maxoutput,avgoutput])
  
    cnn_x1 = Conv1D(kernel_size=2, filters=128)(embx)
    cnn_x2 = Conv1D(kernel_size=3,filters=128)(embx)
    cnn_x3 = Conv1D(kernel_size=4,filters=128)(embx)
    cnn_x4 = Conv1D(kernel_size=5,filters=128)(embx)
    cnn_x1_mp = GlobalMaxPool1D()(cnn_x1)
    cnn_x2_mp = GlobalMaxPool1D()(cnn_x2)
    cnn_x3_mp = GlobalMaxPool1D()(cnn_x3)
    cnn_x4_mp = GlobalMaxPool1D()(cnn_x4)
    cnn_x1_avg = GlobalAveragePooling1D()(cnn_x1)
    cnn_x2_avg = GlobalAveragePooling1D()(cnn_x2)
    cnn_x3_avg = GlobalAveragePooling1D()(cnn_x3)
    cnn_x4_avg = GlobalAveragePooling1D()(cnn_x4)
    concat_layer = Concatenate()([cnn_x1_mp,cnn_x2_mp,cnn_x3_mp,cnn_x4_mp,cnn_x1_avg,cnn_x2_avg,cnn_x3_avg,cnn_x4_avg])
#     flatten_layer = Flatten()(concat_layer)
#     concat_layer_drop = Dropout(0.2)(concat_layer)
    yk_output_dense = Dense(128, activation='relu')(concat_layer)
    transformer_output_dense = Dense(128, activation='relu')(transformer_output_concat)

    add_out = Add()([transformer_output_dense,yk_output_dense])
    norm_out = tr.LayerNormalization(name="yk_transformer_normalizer")(add_out)

#     concat_output = Concatenate()([])
    output_dense = Dense(128, activation='relu')(norm_out)

    output_dense = Dropout(0.2)(output_dense)
#     output_dense = Dense(32, activation='relu')(output_dense)
#     output_dense = Dropout(0.2)(output_dense)
    output = Dense(1, activation='sigmoid')(output_dense)
    return Model(input,output)

In [39]:
model = get_transformer_yk_model(2,embedding,200,depth=1)

PE shape is (200, 300)
The PE output shape is Tensor("positional_encoder_6/add:0", shape=(?, 200, 300), dtype=float32)


In [40]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_sentence (InputLayer)     (None, 200)          0                                            
__________________________________________________________________________________________________
embedding_7 (Embedding)         (None, 200, 300)     30450300    input_sentence[0][0]             
__________________________________________________________________________________________________
positional_encoder_6 (Positiona (None, 200, 300)     60000       embedding_7[0][0]                
__________________________________________________________________________________________________
sdpa_12 (SDPA)                  (None, 200, 150)     135000      positional_encoder_6[0][0]       
__________________________________________________________________________________________________
sdpa_13 (S

In [41]:
model.load_weights('models/best_model_transformer_yk_embtrue_0001__3.h5')

In [49]:
emb_model = Model(model.input, model.layers[1].output)

In [43]:
sdpa1 = transformer_model.layers[3]
sdpa2 = transformer_model.layers[4]

In [44]:
sdpa_weights = sdpa1.get_weights()
sdpa_weights_2 = sdpa2.get_weights()

In [50]:
association = check_sent_association("haha you guys are a bunch of losers", emb_model, sdpa_weights, tokenizer, 200)
print_association(association)

{'haha': ['of', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'you': ['of', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'guys': ['of', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'are': ['of', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'a': ['of', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'bunch': ['of', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'of': ['of', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'losers': ['haha', 'guys', 'bunch', 'NA', 'NA', 'NA', 'NA', 'NA']}


In [52]:
association = check_sent_association("It is ridiculous that these guys are being called protesters. Being armed is a threat of violence , which makes them terrorists", emb_model, sdpa_weights, tokenizer, 200)
print_association(association)

{'It': ['NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'is': ['threat', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'ridiculous': ['threat', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'that': ['threat', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'these': ['threat', 'violence', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'guys': ['threat', 'which', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA']}
{'are': ['threat', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', '