In [1]:
import tensorflow as tf
import numpy as np
import pickle

2023-05-04 14:11:58.897405: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# get preprocessed data:
train_file = '../preprocessed_texts.txt'
file = open(train_file, "r")

train_data = file.read()
train_data = train_data.split(' ')

file.close()


In [3]:
# get vocabulary:
with open('../vocabulary.pkl', 'rb') as fp:
    vocabulary = pickle.load(fp)
    
vocab_size = len(vocabulary)

## Bi-directional LSTM Masked Language Modeling

references: 

https://keras.io/examples/nlp/masked_language_modeling/#create-bert-model-pretraining-model-for-masked-language-modeling

https://www.kaggle.com/code/ritvik1909/masked-language-modelling-rnn#Data-Preparation

https://keras.io/examples/nlp/bidirectional_lstm_imdb/

questions:
- should we split data by sentence instead of by fixed window size of 20?


### more data preparation

In [21]:
# convert words to vectors
vectorized_text = list(map(lambda x: vocabulary[x], train_data))
vectorized_text = np.array(vectorized_text)

# add [mask] to vocabulary
mask_id = vocab_size
vocabulary['[mask]'] = mask_id

# split data into sequences of length 20
vectorized_text_len = len(vectorized_text) - (len(vectorized_text) % 20)
vectorized_text = vectorized_text[:vectorized_text_len]
vectorized_text = np.reshape(vectorized_text,[-1,20])

In [22]:
vectorized_text

array([[4556,  986, 4556, ..., 1696, 4015,    0],
       [ 718, 4250, 3636, ...,    0, 4556, 1095],
       [   0, 4556, 4556, ..., 1280, 4556, 4556],
       ...,
       [1533,  822, 2609, ..., 1954, 1778, 1731],
       [1449, 2609,    0, ..., 4556, 2856, 2622],
       [4580,    0,  349, ..., 4309, 4556,  165]])

In [23]:
def mask_one_input_label(sequence):
    
    # randomly choose one position in sequence to mask
    mask = np.random.randint(low=0, high=20)
    
    # add mask to input
    masked_input = [token if i != mask else mask_id for i, token in enumerate(sequence)]
    
    # set all values in label to -1(ignored by loss function) except the value at the masked position
    label = [-1 if i!= mask else token for i, token in enumerate(sequence)]
    return masked_input, label


In [24]:
# get masked inputs and labels
def get_masked_inputs_labels(text):
    inputs = []
    labels = []

    for seq in text:
        x,y = mask_one_input_label(seq)
        inputs.append(x)
        labels.append(y)
    inputs = np.array(inputs)
    labels = np.array(labels)
    
    return inputs, labels


In [25]:
inputs, labels = get_masked_inputs_labels(vectorized_text)

In [18]:
print(inputs[0], labels[0])

[4556  986 4556 5001 4556 3012    0 4556 1965  846 4641 1398 3772 3232
 2543 1061    0 1696 4015    0] [ -1  -1  -1 389  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
  -1  -1]


### bi-directional lstm model building and training

In [9]:
# define masked language modeling class
class LSTM_MLM(tf.keras.Model):
    def __init__(self, vocab_size, embed_size, input_length):
        """
        The Model class predicts the next words in a sequence.
        : param vocab_size : The number of unique words in the data
        : param hidden_size   : The size of your desired RNN
        : param embed_size : The size of your latent embedding
        """

        super().__init__()

        self.vocab_size = vocab_size
        self.embed_size = embed_size
        self.input_length = input_length

        ## TODO: define your trainable variables and/or layers here. This should include an
        ## embedding component, and any other variables/layers you require.

        # embedding layer
        self.embedding = tf.keras.layers.Embedding(input_dim=self.vocab_size+1, output_dim=self.embed_size)
        self.lstm = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True))
        self.dense1 = tf.keras.layers.Dense(self.vocab_size, activation='softmax')

        # fully connected linear layers


    def call(self, inputs):
        """
        You must use an embedding layer as the first layer of your network (i.e. tf.nn.embedding_lookup or tf.keras.layers.Embedding)
        :param inputs: word ids of shape (batch_size, 2)
        :return: logits: The batch element probabilities as a tensor of shape (batch_size, vocab_size)
        """

        # embedding layer
        x = inputs
        
        x = self.embedding(x)
        x = self.lstm(x)
        x = self.dense1(x)

        
        return x


In [12]:
model = LSTM_MLM(vocab_size, 64, 20)
loss_metric = tf.keras.losses.SparseCategoricalCrossentropy(ignore_class=-1)
# accuracy is not a good measure
model.compile(loss=loss_metric, optimizer='adam')
model.fit(x=inputs, y=labels, batch_size=100, epochs=20) 


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fc7cb52a890>

In [14]:
embeddings = model.layers[0].get_weights()[0]

In [16]:
embeddings.shape

(5002, 64)

In [18]:
np.savetxt("bidirectional_lstm_embedding.csv", embeddings, delimiter=",")

In [13]:
model.save("bi_lstm")



INFO:tensorflow:Assets written to: bi_lstm/assets


INFO:tensorflow:Assets written to: bi_lstm/assets


In [17]:
# load model:
bi_lstm_model = tf.keras.models.load_model("bi_lstm")

2023-05-04 14:18:48.343292: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [50]:
bi_lstm_model(inputs[:1])

<tf.Tensor: shape=(1, 20, 5001), dtype=float32, numpy=
array([[[9.1585011e-05, 1.3117569e-06, 1.0997366e-05, ...,
         1.5732139e-06, 3.8908574e-06, 3.5026821e-04],
        [9.1562953e-07, 2.3136870e-04, 8.8943061e-06, ...,
         2.0479801e-04, 4.1539955e-05, 7.2520037e-05],
        [2.1770304e-01, 4.0949718e-08, 7.1525953e-08, ...,
         7.3236592e-08, 1.0938587e-06, 3.3486653e-05],
        ...,
        [2.5719564e-07, 1.7585068e-06, 6.0397913e-05, ...,
         3.0698136e-06, 2.8085655e-05, 7.6608595e-09],
        [5.3187246e-06, 1.3355157e-06, 3.8149719e-06, ...,
         6.8192338e-07, 7.3317761e-07, 5.0227671e-09],
        [1.6297359e-06, 2.8895494e-04, 2.9488765e-05, ...,
         2.8555092e-04, 1.3249049e-04, 3.0251442e-06]]], dtype=float32)>

In [29]:
model

<__main__.LSTM_MLM at 0x7fc7e44d5120>

### get predicted probability

In [156]:
# make prediction

def get_predicted_probability(masked_sentence, target_word, model):
    masked_sentence = masked_sentence.split(' ')
    mask_loc = masked_sentence.index('[mask]')
    target_id = vocabulary[target_word]
    query_id = [vocabulary[q] for q in masked_sentence]
    query_id = tf.expand_dims(query_id, axis=0)
    #query_id = tf.keras.utils.pad_sequences(query_id, maxlen=20)

    
    #print(query_id.shape, query_id)
    pred = model.predict(tf.cast(query_id, dtype=tf.int64))[:,mask_loc, target_id]
    return pred


In [137]:
# computing the piror probabilities
test_sentence = '[mask] is'
is_control = get_predicted_probability(test_sentence, 'she', bi_lstm_model),get_predicted_probability(test_sentence, 'he', bi_lstm_model)

test_sentence = '[mask] go to'
go_to_control = get_predicted_probability(test_sentence, 'she', bi_lstm_model), get_predicted_probability(test_sentence, 'he', bi_lstm_model)

test_sentence = '[mask] like to'
like_to_control = get_predicted_probability(test_sentence, 'she', bi_lstm_model), get_predicted_probability(test_sentence, 'he', bi_lstm_model)

test_sentence = '[mask] like'
like_control = get_predicted_probability(test_sentence, 'she', bi_lstm_model), get_predicted_probability(test_sentence, 'he', bi_lstm_model)




In [149]:
print(is_control, go_to_control, like_to_control, like_control)

(array([5.183443e-05], dtype=float32), array([0.00030025], dtype=float32)) (array([0.00014551], dtype=float32), array([0.00217571], dtype=float32)) (array([0.01150292], dtype=float32), array([0.03890121], dtype=float32)) (array([0.00222062], dtype=float32), array([0.01342267], dtype=float32))


In [150]:
def calculate_predicted_prob(test_sentence, control, model):
    print(test_sentence, '| she: ',get_predicted_probability(test_sentence, 'she', model) / control[0], 'he: ',get_predicted_probability(test_sentence, 'he', model) / control[1])

In [152]:
test_sentence = '[mask] is brave'
print(test_sentence, '| she: ',get_predicted_probability(test_sentence, 'she', bi_lstm_model) / is_control[0], 'he: ',get_predicted_probability(test_sentence, 'he', bi_lstm_model) / is_control[1])

test_sentence = '[mask] go to adventure'
print(test_sentence, '| she: ',get_predicted_probability(test_sentence, 'she', bi_lstm_model) / go_to_control[0], 'he: ',get_predicted_probability(test_sentence, 'he', bi_lstm_model) / go_to_control[1])

test_sentence = '[mask] is dancer'
print(test_sentence, '| she: ',get_predicted_probability(test_sentence, 'she', bi_lstm_model) / is_control[0], 'he: ',get_predicted_probability(test_sentence, 'he', bi_lstm_model) / is_control[1])

test_sentence = '[mask] is powerful'
calculate_predicted_prob(test_sentence, is_control, bi_lstm_model)

test_sentence = '[mask] defend kingdom'
print(test_sentence, get_predicted_probability(test_sentence, 'she', bi_lstm_model),get_predicted_probability(test_sentence, 'he', bi_lstm_model))

test_sentence = '[mask] like flower'
calculate_predicted_prob(test_sentence, like_control, bi_lstm_model)
#print(get_predicted_probability(test_sentence, 'she', bi_lstm_model),get_predicted_probability(test_sentence, 'he', bi_lstm_model))

test_sentence = '[mask] is evil'
calculate_predicted_prob(test_sentence, is_control, bi_lstm_model)
#print(get_predicted_probability(test_sentence, 'she', bi_lstm_model),get_predicted_probability(test_sentence, 'he', bi_lstm_model))


test_sentence = '[mask] clean house'
print(test_sentence, get_predicted_probability(test_sentence, 'she', bi_lstm_model),get_predicted_probability(test_sentence, 'he', bi_lstm_model))

test_sentence = '[mask] is farmer'
calculate_predicted_prob(test_sentence, is_control, bi_lstm_model)

test_sentence = '[mask] is doctor'
calculate_predicted_prob(test_sentence, is_control, bi_lstm_model)


[mask] is brave | she:  [0.2842527] he:  [5.285549]
[mask] go to adventure | she:  [2.7535584] he:  [6.393502]
[mask] is dancer | she:  [3.713658] he:  [3.498644]
[mask] is powerful | she:  [0.4792545] he:  [1.0384507]
[mask] defend kingdom [0.00012739] [0.01688957]
[mask] like flower | she:  [5.0011287] he:  [0.43901497]
[mask] is evil | she:  [10.290385] he:  [4.77123]
[mask] clean house [0.00496159] [0.00163339]
[mask] is farmer | she:  [0.55351114] he:  [0.33463782]
[mask] is doctor | she:  [1.6848944] he:  [0.04227228]


In [157]:
# do predictions for transformer model

test_sentence = '[mask] is brave'
print(test_sentence, '| she: ',get_predicted_probability(test_sentence, 'she', transformer_model) / is_control[0], 'he: ',get_predicted_probability(test_sentence, 'he', transformer_model) / is_control[1])

test_sentence = '[mask] go to adventure'
print(test_sentence, '| she: ',get_predicted_probability(test_sentence, 'she', transformer_model) / go_to_control[0], 'he: ',get_predicted_probability(test_sentence, 'he', transformer_model) / go_to_control[1])

test_sentence = '[mask] is dancer'
print(test_sentence, '| she: ',get_predicted_probability(test_sentence, 'she', transformer_model) / is_control[0], 'he: ',get_predicted_probability(test_sentence, 'he', transformer_model) / is_control[1])

test_sentence = '[mask] is powerful'
calculate_predicted_prob(test_sentence, is_control, transformer_model)

test_sentence = '[mask] defend kingdom'
print(test_sentence, get_predicted_probability(test_sentence, 'she', transformer_model),get_predicted_probability(test_sentence, 'he', transformer_model))

test_sentence = '[mask] like flower'
calculate_predicted_prob(test_sentence, like_control, transformer_model)
#print(get_predicted_probability(test_sentence, 'she', transformer_model),get_predicted_probability(test_sentence, 'he', transformer_model))

test_sentence = '[mask] is evil'
calculate_predicted_prob(test_sentence, is_control, transformer_model)
#print(get_predicted_probability(test_sentence, 'she', transformer_model),get_predicted_probability(test_sentence, 'he', transformer_model))


test_sentence = '[mask] clean house'
print(test_sentence, get_predicted_probability(test_sentence, 'she', transformer_model),get_predicted_probability(test_sentence, 'he', transformer_model))

test_sentence = '[mask] is farmer'
calculate_predicted_prob(test_sentence, is_control, transformer_model)

test_sentence = '[mask] is doctor'
calculate_predicted_prob(test_sentence, is_control, transformer_model)


[mask] is brave | she:  [1.2916332] he:  [4.377674]
[mask] go to adventure | she:  [0.03073567] he:  [12.820654]
[mask] is dancer | she:  [0.7502615] he:  [3.8066258]
[mask] is powerful | she:  [4.3927627] he:  [9.716445]
[mask] defend kingdom [0.0148472] [0.01306464]
[mask] like flower | she:  [0.03776532] he:  [0.64926606]
[mask] is evil | she:  [15.690093] he:  [5.5144444]
[mask] clean house [0.0032139] [0.00334156]
[mask] is farmer | she:  [0.10256335] he:  [11.241835]
[mask] is doctor | she:  [0.47087517] he:  [22.042593]


### access embedding layer:

In [192]:
embeddings = model.layers[0].get_weights()[0]
embeddings.shape

(4127, 64)

### testing lstm model on HW4 data:

In [44]:
file = open('../data/hw4_train.txt', "r")

hw4_data = file.read()
hw4_data = hw4_data.replace('\n', ' ').split(' ')

file.close()

In [49]:
hw4_vocabulary, hw4_vocab_size = get_vocab(hw4_data)

In [50]:
# convert words to vectors
hw4_vectorized_text = list(map(lambda x: hw4_vocabulary[x], hw4_data))
hw4_vectorized_text = np.array(hw4_vectorized_text)

# add [mask] to vocabulary
mask_id = vocab_size
hw4_vocabulary['[mask]'] = mask_id

# split data into sequences of length 20
hw4_vectorized_text_len = len(hw4_vectorized_text) - (len(hw4_vectorized_text) % 20)
hw4_vectorized_text = hw4_vectorized_text[:hw4_vectorized_text_len]
hw4_vectorized_text = np.reshape(hw4_vectorized_text,[-1,20])

In [52]:
hw4_inputs, hw4_labels = get_masked_inputs_labels(hw4_vectorized_text)

In [193]:
# testing model performance on hw4 data:
# model = LSTM_MLM(hw4_vocab_size, 64, 20)
# loss_metric = tf.keras.losses.SparseCategoricalCrossentropy(ignore_class=-1)
# model.compile(loss=loss_metric, optimizer='adam')
# model.fit(x=hw4_inputs, y=hw4_labels, batch_size=20, epochs=50)

## Transformers

references: "Attention Is All You Need" paper by Vaswani et al.

In [19]:
class SingleHeadAttention(tf.keras.layers.Layer):
    def __init__(self, d_model):
        super(SingleHeadAttention, self).__init__()
        self.d_model = d_model
        self.query = tf.keras.layers.Dense(d_model)
        self.key = tf.keras.layers.Dense(d_model)
        self.value = tf.keras.layers.Dense(d_model)

    def call(self, q, k, v, mask):
        q = self.query(q)
        k = self.key(k)
        v = self.value(v)
        
        dk = tf.cast(tf.shape(k)[-1], tf.float32)
        scaled_attention_logits = tf.matmul(q, k, transpose_b=True) / tf.math.sqrt(dk)
        
        if mask is not None:
            scaled_attention_logits += (mask * -1e9)
        
        attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)
        output = tf.matmul(attention_weights, v)

        return output, attention_weights

In [20]:
class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, d_model):
        super(TransformerBlock, self).__init__()
        self.d_model = d_model
        self.att = SingleHeadAttention(d_model)
        self.ffn = tf.keras.Sequential([
            tf.keras.layers.Dense(d_model * 4, activation='relu'),
            tf.keras.layers.Dense(d_model)
        ])

        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.dropout1 = tf.keras.layers.Dropout(0.1)
        self.dropout2 = tf.keras.layers.Dropout(0.1)

    def call(self, x, training, mask=None):
        attn_output, _ = self.att(x, x, x, mask)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(x + attn_output)

        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        out2 = self.layernorm2(out1 + ffn_output)

        return out2

In [21]:
class Transformer_MLM(tf.keras.Model):
    def __init__(self, vocab_size, embed_size, input_length):
        super().__init__()

        self.vocab_size = vocab_size
        self.embed_size = embed_size
        self.input_length = input_length

        self.embedding = tf.keras.layers.Embedding(input_dim=self.vocab_size+1, output_dim=self.embed_size)
        self.transformer_block = TransformerBlock(self.embed_size)
        self.dense1 = tf.keras.layers.Dense(self.vocab_size, activation='softmax')

    def call(self, inputs):
        x = inputs
        x = self.embedding(x)
        x = self.transformer_block(x, training=True)
        x = self.dense1(x)
        
        return x

In [23]:
model_t = Transformer_MLM(vocab_size, 64, 20)
loss_metric = tf.keras.losses.SparseCategoricalCrossentropy(ignore_class=-1)
model_t.compile(loss=loss_metric, optimizer='adam')
model_t.fit(x=inputs, y=labels, batch_size=100, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x13723ead0>

In [24]:
embeddings_t = model_t.layers[0].get_weights()[0]

In [25]:
embeddings_t.shape

(5002, 64)

In [27]:
np.savetxt("transformer_embedding.csv", embeddings_t, delimiter=",")

In [28]:
model_t.save("transformer")



INFO:tensorflow:Assets written to: transformer/assets


INFO:tensorflow:Assets written to: transformer/assets


In [40]:
# load model:
transformer_model = tf.keras.models.load_model("transformer")

In [41]:
transformer_model(inputs[:1])

<tf.Tensor: shape=(1, 20, 5001), dtype=float32, numpy=
array([[[7.20110312e-02, 7.79032600e-19, 1.70412761e-07, ...,
         1.00463919e-12, 1.00197151e-12, 1.45546159e-13],
        [6.59962371e-02, 2.29241464e-18, 8.37374472e-08, ...,
         5.34724912e-12, 5.02420208e-13, 1.01960195e-13],
        [1.05563268e-01, 2.09895659e-19, 1.01196427e-07, ...,
         1.11461899e-13, 2.12502693e-12, 9.00668700e-13],
        ...,
        [1.43963531e-01, 2.24087178e-18, 5.96755640e-08, ...,
         4.98968488e-12, 2.96021172e-12, 3.41184095e-11],
        [7.87760988e-02, 7.08022029e-16, 4.88780233e-06, ...,
         1.66679504e-10, 3.28379858e-11, 3.20182457e-11],
        [2.88114119e-02, 6.79120185e-14, 9.41598319e-06, ...,
         5.85992421e-10, 5.75283687e-09, 5.43263545e-10]]], dtype=float32)>

In [42]:
test_sentence = '[mask] like beautiful dress'

In [43]:
get_predicted_probability(test_sentence, 'she', transformer_model)

ValueError: in user code:

    File "/opt/anaconda3/envs/csci1470/lib/python3.10/site-packages/keras/engine/training.py", line 2137, in predict_function  *
        return step_function(self, iterator)
    File "/opt/anaconda3/envs/csci1470/lib/python3.10/site-packages/keras/engine/training.py", line 2123, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/opt/anaconda3/envs/csci1470/lib/python3.10/site-packages/keras/engine/training.py", line 2111, in run_step  **
        outputs = model.predict_step(data)
    File "/opt/anaconda3/envs/csci1470/lib/python3.10/site-packages/keras/engine/training.py", line 2079, in predict_step
        return self(x, training=False)
    File "/opt/anaconda3/envs/csci1470/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None

    ValueError: Exception encountered when calling layer 'transformer_mlm_1' (type Transformer_MLM).
    
    Could not find matching concrete function to call loaded from the SavedModel. Got:
      Positional arguments (1 total):
        * <tf.Tensor 'inputs:0' shape=(None, 4) dtype=int64>
      Keyword arguments: {'training': False}
    
     Expected these arguments to match one of the following 4 option(s):
    
    Option 1:
      Positional arguments (1 total):
        * TensorSpec(shape=(None, 20), dtype=tf.int64, name='inputs')
      Keyword arguments: {'training': False}
    
    Option 2:
      Positional arguments (1 total):
        * TensorSpec(shape=(None, 20), dtype=tf.int64, name='inputs')
      Keyword arguments: {'training': True}
    
    Option 3:
      Positional arguments (1 total):
        * TensorSpec(shape=(None, 20), dtype=tf.int64, name='input_1')
      Keyword arguments: {'training': False}
    
    Option 4:
      Positional arguments (1 total):
        * TensorSpec(shape=(None, 20), dtype=tf.int64, name='input_1')
      Keyword arguments: {'training': True}
    
    Call arguments received by layer 'transformer_mlm_1' (type Transformer_MLM):
      • args=('tf.Tensor(shape=(None, 4), dtype=int64)',)
      • kwargs=<class 'inspect._empty'>


In [36]:
get_predicted_probability(test_sentence, 'she', model_t)

<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.00011386], dtype=float32)>

In [37]:
get_predicted_probability(test_sentence, 'he', model_t)

<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.0017492], dtype=float32)>

In [38]:
get_predicted_probability(test_sentence, 'queen', model_t)

<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.09520493], dtype=float32)>

In [39]:
get_predicted_probability(test_sentence, 'king', model_t)

<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.00025578], dtype=float32)>

In [40]:
test_sentence_evil = 'evil old [mask]'

In [43]:
get_predicted_probability(test_sentence_evil, 'man', model_t)

<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.02564601], dtype=float32)>

In [44]:
get_predicted_probability(test_sentence_evil, 'woman', model_t)

<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.04206395], dtype=float32)>

In [69]:
test_brave = 'brave [mask]'
get_predicted_probability(test_brave, 'woman', model_t)

<tf.Tensor: shape=(1,), dtype=float32, numpy=array([6.268294e-05], dtype=float32)>

In [70]:
get_predicted_probability(test_brave, 'man', model_t)

<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.03903314], dtype=float32)>

In [71]:
test_power = 'powerful [mask]'
get_predicted_probability(test_power, 'woman', model_t)

<tf.Tensor: shape=(1,), dtype=float32, numpy=array([6.4778555e-06], dtype=float32)>

In [72]:
get_predicted_probability(test_brave, 'man', model_t)

<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.01902533], dtype=float32)>