In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np
from transformers import TFBertModel, BertTokenizer, BertConfig

tf.get_logger().setLevel('ERROR')
gpu = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpu[0], True)

#from tensorflow.keras.mixed_precision import experimental as mixed_precision
#policy = mixed_precision.Policy('float32')
#mixed_precision.set_policy(policy)

In [2]:
df = pd.read_csv('df_train_bert.csv')
#df.drop(['id','keyword','location'],axis=1,inplace=True)
df.head()

Unnamed: 0,text,target,text_clean
0,Our Deeds are the Reason of this #earthquake M...,1,our deeds are the reason of this earthquake ma...
1,Forest fire near La Ronge Sask. Canada,1,forest fire near la ronge sask canada
2,All residents asked to 'shelter in place' are ...,1,all residents asked to shelter in place are be...
3,"13,000 people receive #wildfires evacuation or...",1,13000 people receive wildfires evacuation orde...
4,Just got sent this photo from Ruby #Alaska as ...,1,just got sent this photo from ruby alaska as s...


In [3]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

In [4]:
seq = df.text_clean.apply(lambda x: len(x.split()))

In [5]:
df.text_clean.head()

0    our deeds are the reason of this earthquake ma...
1                forest fire near la ronge sask canada
2    all residents asked to shelter in place are be...
3    13000 people receive wildfires evacuation orde...
4    just got sent this photo from ruby alaska as s...
Name: text_clean, dtype: object

In [6]:
seq = seq.max()

In [7]:
X = np.zeros((len(df),seq))
M = np.zeros((len(df),seq))

In [8]:
X.shape

(7613, 31)

In [9]:
for i, ligne in enumerate(df.text_clean):
    tokens = tokenizer.encode_plus(ligne, max_length=seq,truncation=True, padding="max_length",
                  add_special_tokens=True,return_token_type_ids=False,
                  return_attention_mask=True, return_tensors='tf')
    X[i,:], M[i,:] = tokens.input_ids, tokens.attention_mask

In [10]:
X[0,:]

array([  101.,  2256., 15616.,  2024.,  1996.,  3114.,  1997.,  2023.,
        8372.,  2089., 16455.,  9641.,  2149.,  2035.,   102.,     0.,
           0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,
           0.,     0.,     0.,     0.,     0.,     0.,     0.])

In [11]:
test = tf.keras.utils.to_categorical(df.target)

In [12]:
#tensor_df = tf.data.Dataset.from_tensor_slices((X, df.target.values.astype(float)))
tensor_df = tf.data.Dataset.from_tensor_slices((X,M, test))

In [13]:
for X,M, labels in tensor_df.take(1):
    print(X)
    print(M)
    print(labels)

tf.Tensor(
[  101.  2256. 15616.  2024.  1996.  3114.  1997.  2023.  8372.  2089.
 16455.  9641.  2149.  2035.   102.     0.     0.     0.     0.     0.
     0.     0.     0.     0.     0.     0.     0.     0.     0.     0.
     0.], shape=(31,), dtype=float64)
tf.Tensor(
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0.], shape=(31,), dtype=float64)
tf.Tensor([0. 1.], shape=(2,), dtype=float32)


In [14]:
def dict_func(input_id,mask,labels):
    return{'input_id':input_id,'mask':mask}, labels

In [15]:
tensor_df = tensor_df.map(dict_func)

In [16]:
TAKE_SIZE = int(0.7*df.shape[0])
BATCH_SIZE = 16

train_data = tensor_df.take(TAKE_SIZE).shuffle(TAKE_SIZE)
train_data = train_data.batch(BATCH_SIZE)

val_data = tensor_df.skip(TAKE_SIZE).shuffle(df.shape[0]-TAKE_SIZE)
val_data = val_data.batch(BATCH_SIZE)

In [17]:
for train,label in train_data.take(1):
    print(train)
    print(label)

{'input_id': <tf.Tensor: shape=(16, 31), dtype=float64, numpy=
array([[  101.,  2210., 16216., 19665., 10720.,  2378.,  1999.,  2026.,
         3871.,  1045.,  3092.,  2039.,  5094.,  2032.,  2041.,  1045.,
         6878.,  2002.,  2453., 19549.,  2226.,   102.,     0.,     0.,
            0.,     0.,     0.,     0.,     0.,     0.,     0.],
       [  101.,  2329.,  8670.,  3489.,  2125.,  2001.,  2307.,  3492.,
        26316.,  5312.,  8494., 14540.,  5178.,   102.,     0.,     0.,
            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,
            0.,     0.,     0.,     0.,     0.,     0.,     0.],
       [  101.,  2047.,  7738., 18907., 22148.,  3221.,  3898., 16167.,
         2143.,  2005., 25935.,  1062., 10790.,  2440.,  3191., 23471.,
          102.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,
            0.,     0.,     0.,     0.,     0.,     0.,     0.],
       [  101.,  1045.,  9530., 10841.,  2099.,  1996.,  2936.,  2017.,
         5247.,  2007.

In [18]:
bert = TFBertModel.from_pretrained('bert-base-uncased',output_attentions=True)

Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [19]:
bert.summary()

Model: "tf_bert_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bert (TFBertMainLayer)       multiple                  109482240 
Total params: 109,482,240
Trainable params: 109,482,240
Non-trainable params: 0
_________________________________________________________________


In [20]:
bert.trainable = False

In [21]:
@tf.autograph.experimental.do_not_convert
def model():
    input_ids = tf.keras.layers.Input(shape=(seq,), name='input_id', dtype='int64')
    mask = tf.keras.layers.Input(shape=(seq,), name='mask', dtype='int64')
    embeddings = bert(input_ids,attention_mask=mask)[0]
    
    
    #X = tf.keras.layers.LSTM(64, return_sequences=False)(embeddings)
    X = tf.keras.layers.GlobalMaxPool1D()(embeddings)
    X = tf.keras.layers.BatchNormalization()(X)
    X = tf.keras.layers.Dense(512, activation='relu')(X)
    X = tf.keras.layers.Dropout(0.5)(X)
    X = tf.keras.layers.Dense(128, activation='relu')(X)
    X = tf.keras.layers.Dense(32,activation='relu')(X)
    y = tf.keras.layers.Dense(2,activation='softmax',name='outputs')(X)

    return  tf.keras.Model(inputs=[input_ids,mask], outputs=y)

In [22]:
model = model()

The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.


In [23]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_id (InputLayer)           [(None, 31)]         0                                            
__________________________________________________________________________________________________
mask (InputLayer)               [(None, 31)]         0                                            
__________________________________________________________________________________________________
tf_bert_model (TFBertModel)     TFBaseModelOutputWit 109482240   input_id[0][0]                   
                                                                 mask[0][0]                       
__________________________________________________________________________________________________
lstm (LSTM)                     (None, 64)           213248      tf_bert_model[0][12]         

In [24]:
#opt = tf.keras.optimizers.RMSprop(lr=0.0001, decay=1e-6)
opt = tf.keras.optimizers.Adam(0.005)

In [25]:

model.compile(optimizer = opt,
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=tf.keras.metrics.CategoricalAccuracy())

In [26]:
history = model.fit(x=train_data,validation_data=val_data,epochs=40)

Epoch 1/40
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'


The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'g

The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.




The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.


Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
 29/334 [=>............................] - ETA: 15s - loss: 0.4529 - categorical_accuracy: 0.7827

InternalError:    Failed to call ThenRnnBackward with model config: [rnn_mode, rnn_input_mode, rnn_direction_mode]: 2, 0, 0 , [num_layers, input_size, num_units, dir_count, max_seq_length, batch_size, cell_num_units]: [1, 768, 64, 1, 31, 16, 64] 
	 [[{{node gradients/CudnnRNN_grad/CudnnRNNBackprop}}]]
	 [[Adam/gradients/PartitionedCall]] [Op:__inference_train_function_42754]

Function call stack:
train_function -> train_function -> train_function


In [None]:
import matplotlib.pyplot as plt

In [None]:
loss_curve = history.history["loss"]
loss_val = history.history["val_loss"]
acc_curve = history.history["categorical_accuracy"]
acc_val = history.history["val_categorical_accuracy"]

plt.plot(loss_val,label="Loss_val")
plt.plot(loss_curve,label="Loss_train")
plt.legend()
plt.title("loss")
plt.show()

plt.plot(acc_val, label="acc_val" )
plt.plot(acc_curve, label="acc_train" )
plt.legend()
plt.title("accuracy")
plt.show()