In [46]:
import tensorflow as tf
from tqdm import tqdm
import numpy as np
from transformers import RobertaTokenizer, RobertaConfig, TFBertModel
from sklearn.model_selection import train_test_split
import pickle
import random
import sys
import math
from scipy import stats
from scipy.spatial import distance
import random
import os
import pandas as pd

from tensorflow.keras.preprocessing.sequence import pad_sequences

In [47]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
# Limit GPU memory growth
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

Num GPUs Available:  1
1 Physical GPUs, 1 Logical GPUs


In [55]:
base_folder = "../../datasets/"
input_file = "hate_int_prof_SVO.tsv"
output_folder = "output_weights/"
output_file = "output_weights"
roberta_model = "roberta-base"

max_length = 512
TEST_SIZE = 0.2
seed = 42

use_attention = True

roberta_dropout = 0.2
lstm_units = 256
dense_units = 50
lstm_dropout = 0.1
dense_dropout = 0.2
epochs = 10 #(Default 10)
batch_size = 1

def random_seed(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

random_seed(seed)

In [56]:
dataframe = pd.read_table(base_folder + input_file)
dataframe.head()

Unnamed: 0,Sentence,Intensity,Profanity,Subject,Verb,Object
0,Islam looks like a cult more than like a relig...,9.0,1,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...","[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,We must send Islamics back to their native cou...,7.0,0,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...","[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,"Europe is civilised, Muslims should not stay t...",7.0,0,"[1, 0, 0, 0, 1, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
3,"If they love Sharia law so much, why do not th...",8.0,0,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...","[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ...","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,Islam=evil. Islam is invading us and trying to...,7.0,0,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, ..."


In [57]:
## Define base Roberta configs
# config = RobertaConfig(dropout=roberta_dropout, attention_dropout=roberta_dropout, output_attentions=True)
config = RobertaConfig.from_pretrained(roberta_model, output_hidden_states=False, attention_probs_dropout_prob=roberta_dropout, output_attentions = True)
config.output_hidden_states = False

transformer_model = TFBertModel.from_pretrained(roberta_model, config=config)
for layer in transformer_model.layers[:3]:  ## Freeze first 3 layers
    layer.trainable = False

# Defining tokonizer
tokenizer = RobertaTokenizer.from_pretrained(roberta_model, do_lower_case=True, add_special_tokens=True, max_length=max_length, pad_to_max_length=True)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['roberta.encoder.layer.3.attention.self.value.weight', 'roberta.encoder.layer.0.output.LayerNorm.bias', 'roberta.encoder.layer.10.attention.output.dense.weight', 'roberta.encoder.layer.11.output.dense.weight', 'roberta.encoder.layer.11.attention.self.query.bias', 'roberta.encoder.layer.7.intermediate.dense.weight', 'roberta.encoder.layer.4.output.dense.bias', 'roberta.encoder.layer.6.attention.self.query.weight', 'roberta.encoder.layer.1.attention.self.query.weight', 'roberta.encoder.layer.1.attention.output.dense.weight', 'roberta.encoder.layer.6.attention.self.query.bias', 'roberta.encoder.layer.6.intermediate.dense.weight', 'roberta.encoder.layer.2.attention.self.value.bias', 'roberta.encoder.layer.5.attention.self.query.weight', 'roberta.encoder.layer.10.attention.output.dense.bias', 'roberta.encoder.layer.0.attention.self.key.weight', 'roberta.encoder.layer.3.attention.output.LayerNorm

In [58]:
input_ids_in = tf.keras.layers.Input(shape=(max_length, ), name='input_token', dtype='int32')
input_masks_in = tf.keras.layers.Input(shape=(max_length, ), name='masked_token', dtype='int32')

embedding_layer = transformer_model(input_ids_in, attention_mask=input_masks_in)[0]

X = tf.keras.layers.Bidirectional(
        tf.keras.layers.LSTM(lstm_units, return_sequences=True, dropout=lstm_dropout, 
                             recurrent_dropout=lstm_dropout, kernel_initializer='normal')
    )(embedding_layer)

if use_attention:
    X = tf.keras.layers.Attention(use_scale=True)([X, X])  # Use attention.
    
X = tf.keras.layers.GlobalMaxPool1D()(X)
X = tf.keras.layers.Dense(dense_units, activation='relu',  kernel_initializer='normal')(X)
X = tf.keras.layers.Dropout(dense_dropout)(X)
X = tf.keras.layers.Dense(1, activation='linear', kernel_initializer='normal')(X)

model = tf.keras.Model(inputs=[input_ids_in, input_masks_in], outputs=X)
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['acc', tf.keras.metrics.RootMeanSquaredError()])
model.summary()

Model: "model_7"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_token (InputLayer)       [(None, 512)]        0           []                               
                                                                                                  
 masked_token (InputLayer)      [(None, 512)]        0           []                               
                                                                                                  
 tf_bert_model_8 (TFBertModel)  TFBaseModelOutputWi  124645632   ['input_token[0][0]',            
                                thPoolingAndCrossAt               'masked_token[0][0]']           
                                tentions(last_hidde                                               
                                n_state=(None, 512,                                         

In [59]:
def tokenize(sentences, subject_embeddings, verb_embeddings, object_embeddings, tokenizer, sentence_length):
    input_ids, input_masks, input_segments = [], [], []
    
    for sentence, subj_emb, verb_emb, obj_emb in tqdm(zip(sentences, subject_embeddings, verb_embeddings, object_embeddings)):
        inputs = tokenizer.encode_plus(sentence, add_special_tokens=True, max_length=sentence_length,
                                       pad_to_max_length=True, return_attention_mask=True, return_token_type_ids=True)
        input_ids.append(inputs['input_ids'])
        input_masks.append(inputs['attention_mask'])
        input_segments.append(inputs['token_type_ids'])
        
        # Concatenate embeddings with tokenized inputs (128 + (128*3))
        input_ids[-1].extend(subj_emb.tolist())
        input_ids[-1].extend(verb_emb.tolist())
        input_ids[-1].extend(obj_emb.tolist())
        
        # Update attention mask and token type ids accordingly
        input_masks[-1].extend([1] * len(subj_emb))
        input_masks[-1].extend([1] * len(verb_emb))
        input_masks[-1].extend([1] * len(obj_emb))
        
        input_segments[-1].extend([1] * len(subj_emb))
        input_segments[-1].extend([1] * len(verb_emb))
        input_segments[-1].extend([1] * len(obj_emb))

    return np.asarray(input_ids, dtype='int32'), np.asarray(input_masks, dtype='int32'), np.asarray(input_segments, dtype='int32')

# Assuming you have defined max_length and tokenizer earlier in your code

input_data = pd.read_table(base_folder + input_file)

sentences = input_data['Sentence'].tolist()
intensity_value = input_data['Intensity'].astype(int).tolist()

SVO_length = 128
subject_embeddings = (pad_sequences([np.fromstring(embedding[1:-1], dtype=int, sep=',') for embedding in input_data["Subject"].tolist()], maxlen=SVO_length, padding='post'))
verb_embeddings = (pad_sequences([np.fromstring(embedding[1:-1], dtype=int, sep=',') for embedding in input_data["Verb"].tolist()], maxlen=SVO_length, padding='post'))
object_embeddings = (pad_sequences([np.fromstring(embedding[1:-1], dtype=int, sep=',') for embedding in input_data["Object"].tolist()], maxlen=SVO_length, padding='post'))

c = list(zip(intensity_value, sentences, subject_embeddings, verb_embeddings, object_embeddings))
random.shuffle(c)
intensity_value, sentences, subject_embeddings, verb_embeddings, object_embeddings = zip(*c)

X_tr, X_te, y_tr, y_te = train_test_split(sentences, intensity_value, test_size=TEST_SIZE, random_state=1)

sentence_length = 128
train_input_ids, train_input_masks, train_input_segment = tokenize(X_tr, subject_embeddings, verb_embeddings, object_embeddings, tokenizer, sentence_length)
test_input_ids, test_input_masks, test_input_segment = tokenize(X_te, subject_embeddings, verb_embeddings, object_embeddings, tokenizer, sentence_length)

y_tr = np.asarray(y_tr)
y_te = np.asarray(y_te)

0it [00:00, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
4843it [00:01, 3817.02it/s]
1211it [00:00, 4767.74it/s]


In [60]:
# print(X_tr) # Training sentences
print("Train input ids: \n", len(train_input_ids[100]), 
      "\n\nAttention masks: \n", len(train_input_masks[0]), 
      "\n\nToken type ids: \n", len(train_input_segment[0])) # input_ids, attention_masks, token_type_ids

Train input ids: 
 512 

Attention masks: 
 512 

Token type ids: 
 512


In [61]:
model.fit(x=[train_input_ids, train_input_masks], y=y_tr, epochs=epochs, validation_split=0.1, batch_size=batch_size)

Epoch 1/10


ResourceExhaustedError: Graph execution error:

Detected at node 'model_7/bidirectional_7/forward_lstm_7/while/lstm_cell_22/mul_8' defined at (most recent call last):
    File "C:\ProgramData\anaconda3\envs\research\lib\runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "C:\ProgramData\anaconda3\envs\research\lib\runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\ipykernel_launcher.py", line 18, in <module>
      app.launch_new_instance()
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
      app.start()
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\ipykernel\kernelapp.py", line 739, in start
      self.io_loop.start()
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\tornado\platform\asyncio.py", line 205, in start
      self.asyncio_loop.run_forever()
    File "C:\ProgramData\anaconda3\envs\research\lib\asyncio\base_events.py", line 601, in run_forever
      self._run_once()
    File "C:\ProgramData\anaconda3\envs\research\lib\asyncio\base_events.py", line 1905, in _run_once
      handle._run()
    File "C:\ProgramData\anaconda3\envs\research\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\ipykernel\kernelbase.py", line 545, in dispatch_queue
      await self.process_one()
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\ipykernel\kernelbase.py", line 534, in process_one
      await dispatch(*args)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\ipykernel\kernelbase.py", line 437, in dispatch_shell
      await result
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\ipykernel\ipkernel.py", line 362, in execute_request
      await super().execute_request(stream, ident, parent)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\ipykernel\kernelbase.py", line 778, in execute_request
      reply_content = await reply_content
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\ipykernel\ipkernel.py", line 449, in do_execute
      res = shell.run_cell(
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\ipykernel\zmqshell.py", line 549, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\IPython\core\interactiveshell.py", line 3048, in run_cell
      result = self._run_cell(
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\IPython\core\interactiveshell.py", line 3103, in _run_cell
      result = runner(coro)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\IPython\core\interactiveshell.py", line 3308, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\IPython\core\interactiveshell.py", line 3490, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\IPython\core\interactiveshell.py", line 3550, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\Abhishek Poswal\AppData\Local\Temp\ipykernel_27204\300353656.py", line 1, in <module>
      model.fit(x=[train_input_ids, train_input_masks], y=y_tr, epochs=epochs, validation_split=0.1, batch_size=batch_size)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\engine\training.py", line 1564, in fit
      tmp_logs = self.train_function(iterator)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\engine\training.py", line 1160, in train_function
      return step_function(self, iterator)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\engine\training.py", line 1146, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\engine\training.py", line 1135, in run_step
      outputs = model.train_step(data)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\engine\training.py", line 993, in train_step
      y_pred = self(x, training=True)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\engine\training.py", line 557, in __call__
      return super().__call__(*args, **kwargs)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\engine\functional.py", line 510, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\engine\functional.py", line 667, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\layers\rnn\bidirectional.py", line 277, in __call__
      return super().__call__(inputs, **kwargs)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\layers\rnn\bidirectional.py", line 404, in call
      y = self.forward_layer(
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\layers\rnn\base_rnn.py", line 553, in __call__
      return super().__call__(inputs, **kwargs)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\layers\rnn\lstm.py", line 625, in call
      last_output, outputs, states = backend.rnn(
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\backend.py", line 5139, in rnn
      final_outputs = tf.compat.v1.while_loop(
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\backend.py", line 5118, in _step
      output, new_states = step_function(
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\layers\rnn\lstm.py", line 623, in step
      return self.cell(inputs, states, **kwargs)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\layers\rnn\lstm.py", line 323, in call
      c, o = self._compute_carry_and_output(x, h_tm1, c_tm1)
    File "C:\ProgramData\anaconda3\envs\research\lib\site-packages\keras\layers\rnn\lstm.py", line 253, in _compute_carry_and_output
      c = f * c_tm1 + i * self.activation(
Node: 'model_7/bidirectional_7/forward_lstm_7/while/lstm_cell_22/mul_8'
failed to allocate memory
	 [[{{node model_7/bidirectional_7/forward_lstm_7/while/lstm_cell_22/mul_8}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_function_80775]

In [None]:
results = model.evaluate(x=[test_input_ids, test_input_masks], y=y_te)
print(results)

result = model.predict(x=[test_input_ids, test_input_masks])
result = np.array(result, dtype=np.float64)
result = result.flatten()

In [None]:
_, _, rmse = results
print("RMSE", rmse)
print("Pearson", stats.pearsonr(result, y_te))
print("Cosine", 1 - distance.cosine(result, y_te))