In [176]:
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertModel, BertTokenizer, TFBertModel
from sklearn.decomposition import PCA

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = TFBertModel.from_pretrained('bert-base-uncased')

tweets = pd.read_csv('../Data/tweets.csv', encoding='cp1252', header=None)
tweets.columns = ['target','id','date','flag','username','text'] #Change column names to things that make sense
tweets = tweets.drop(columns=['id','date','flag','username']) #Remove unneeded columns from memory
tweets = tweets.replace({'target':{0:0,4:1}}) #Dataset has only 0=negative sent, 4=positive sent, remappping to 0,1 respectivly
# print(tweets.shape)
tweets = tweets.groupby('target').sample(10,random_state=None)
# print(tweets.shape)

# print(tweets.head())
features = tokenizer(tweets['text'].values.tolist(), padding='max_length', truncation=True, max_length=99, return_tensors='tf')
# tweets['text'] = tweets['text'].apply(lambda x: tokenizer(x.lower(), padding=True, truncation=True, max_length=128, return_tensors='tf'))
features = bert_model(**features).last_hidden_state[:,0,:]
# print("ONE FEATURE ASPECT")
# # print(features[14:15, :])

tweets['text'] = features.numpy().tolist()
# print(tweets.head())

X = np.array([x for x in tweets['text']])
Y = np.array([x for x in tweets['target']])
# X = tweets.drop('target', axis=1)
# Y = tweets['target']

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=265)

#try to increase feature size later
pca = PCA(n_components=3)
X_train = pca.fit_transform(X_train)
X_test = pca.fit_transform(X_test)


print(y_train.shape)
print(X_train.shape)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

(16,)
(16, 3)


In [177]:
from tensorflow.keras.layers import MultiHeadAttention, LayerNormalization
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Layer
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv1D, MaxPooling1D, GlobalAveragePooling1D
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.layers import Embedding, Input, Concatenate
from tensorflow.keras.layers import Subtract
from tensorflow.keras.utils import plot_model
import tensorflow as tf

# The transformer architecture 
class TransformerBlock(Layer): # inherit from Keras Layer
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.2):
        super().__init__()
        # setup the model heads and feedforward network
        self.att = MultiHeadAttention(num_heads=num_heads, 
                                      key_dim=embed_dim)
        
        # make a two layer network that processes the attention
        self.ffn = Sequential()
        self.ffn.add( Dense(ff_dim, activation='relu') )
        self.ffn.add( Dense(embed_dim) )
        
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training):
        # apply the layers as needed (similar to PyTorch)
        
        # get the attention output from multi heads
        # Using same inpout here is self-attention
        # call inputs are (query, value, key) 
        # if only two inputs given, value and key are assumed the same
        attn_output = self.att(inputs, inputs)
        
        # create residual output, with attention
        out1 = self.layernorm1(inputs + attn_output)
        
        # apply dropout if training
        out1 = self.dropout1(out1, training=training)
        
        # place through feed forward after layer norm
        ffn_output = self.ffn(out1)
        out2 = self.layernorm2(out1 + ffn_output)
        
        # apply dropout if training
        out2 = self.dropout2(out2, training=training)
        #return the residual from Dense layer
        return out2
    
class TokenAndPositionEmbedding(Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super().__init__()
        # create two embeddings 
        # one for processing the tokens (words)
        self.token_emb = Embedding(input_dim=vocab_size, 
                                   output_dim=embed_dim)
        # another embedding for processing the position
        self.pos_emb = Embedding(input_dim=maxlen, 
                                 output_dim=embed_dim)

    def call(self, x):
        # create a static position measure (input)
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        # positions now goes from 0 to 500 (for IMdB) by 1
        positions = self.pos_emb(positions)# embed these positions
        x = self.token_emb(x) # embed the tokens
        return x + positions # add embeddngs to get final embedding

In [178]:
embed_dim = 32  # Embedding size for each token
num_heads = 2  # Number of attention heads
ff_dim = 32  # Hidden layer size in feed forward network inside transformer
top_words = 100
NUM_CLASSES =  1

inputs = Input(shape=(X_train.shape[1],))
x = TokenAndPositionEmbedding(X_train.shape[1], top_words, embed_dim)(inputs)
x = TransformerBlock(embed_dim, num_heads, ff_dim)(x)

x = GlobalAveragePooling1D()(x)
x = Dropout(0.2)(x)
x = Dense(20, activation='relu')(x)
x = Dropout(0.2)(x)
outputs = Dense(NUM_CLASSES, activation='sigmoid',
              kernel_initializer='glorot_uniform')(x)

model_xformer = Model(inputs=inputs, outputs=outputs)
print(model_xformer.summary())

Model: "model_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_10 (InputLayer)       [(None, 3)]               0         
                                                                 
 token_and_position_embeddin  (None, 3, 32)            3296      
 g_9 (TokenAndPositionEmbedd                                     
 ing)                                                            
                                                                 
 transformer_block_9 (Transf  (None, 3, 32)            10656     
 ormerBlock)                                                     
                                                                 
 global_average_pooling1d_9   (None, 32)               0         
 (GlobalAveragePooling1D)                                        
                                                                 
 dropout_3331 (Dropout)      (None, 32)                0   

In [179]:
model_xformer.compile(optimizer='adam', 
                      loss='binary_crossentropy', 
                      metrics=['accuracy'])

history = model_xformer.fit(
    X_train, y_train, batch_size=64, epochs=2, 
    validation_data=(X_test, y_test)
)

Epoch 1/2


InvalidArgumentError: Graph execution error:

Detected at node 'model_8/token_and_position_embedding_9/embedding_18/embedding_lookup' defined at (most recent call last):
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/runpy.py", line 194, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/traitlets/config/application.py", line 978, in launch_instance
      app.start()
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 712, in start
      self.io_loop.start()
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
      self._run_once()
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
      handle._run()
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/asyncio/events.py", line 81, in _run
      self._context.run(self._callback, *self._args)
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell
      await result
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 730, in execute_request
      reply_content = await reply_content
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 383, in do_execute
      res = shell.run_cell(
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2885, in run_cell
      result = self._run_cell(
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2940, in _run_cell
      return runner(coro)
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3139, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3318, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3378, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/var/folders/hx/z5bls9fd24g4pfrwpcmmf25m0000gn/T/ipykernel_25987/836710900.py", line 5, in <module>
      history = model_xformer.fit(
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/keras/engine/training.py", line 1409, in fit
      tmp_logs = self.train_function(iterator)
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/keras/engine/training.py", line 1051, in train_function
      return step_function(self, iterator)
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/keras/engine/training.py", line 1040, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/keras/engine/training.py", line 1030, in run_step
      outputs = model.train_step(data)
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/keras/engine/training.py", line 889, in train_step
      y_pred = self(x, training=True)
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/keras/engine/training.py", line 490, in __call__
      return super().__call__(*args, **kwargs)
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/keras/engine/base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/keras/engine/functional.py", line 458, in call
      return self._run_internal_graph(
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/keras/engine/functional.py", line 596, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/keras/engine/base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/var/folders/hx/z5bls9fd24g4pfrwpcmmf25m0000gn/T/ipykernel_25987/909682129.py", line 71, in call
      x = self.token_emb(x) # embed the tokens
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/keras/engine/base_layer.py", line 1014, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/Users/ricklattin/tensorflow_2.9/lib/python3.8/site-packages/keras/layers/core/embedding.py", line 199, in call
      out = tf.nn.embedding_lookup(self.embeddings, inputs)
Node: 'model_8/token_and_position_embedding_9/embedding_18/embedding_lookup'
indices[1,0] = -1 is not in [0, 100)
	 [[{{node model_8/token_and_position_embedding_9/embedding_18/embedding_lookup}}]] [Op:__inference_train_function_340166]

In [None]:
from sklearn import metrics as mt
from matplotlib import pyplot as plt

# yhat_cnn = model_xformer.predict(X_test)
yhat_xformer1 = model_xformer.predict(X_test)
yhat_xformer2 = model_xformer.predict(X_test)

acc = [mt.accuracy_score(y_test,np.round(yhat_xformer1)),
       mt.accuracy_score(y_test,np.round(yhat_xformer2)),
      ]

plt.bar([1,2],acc)
plt.xticks([1,2],['XFORMER1','XFORMER2'])
plt.show()

ModuleNotFoundError: No module named 'matplotlib'

In [None]:
#model.get_layer(*name u get from model.summary*)