In [None]:
!pip install transformers
!pip install tensorflow_addons

Collecting transformers
  Downloading transformers-4.10.3-py3-none-any.whl (2.8 MB)
[K     |████████████████████████████████| 2.8 MB 5.2 MB/s 
[?25hCollecting sacremoses
  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 34.7 MB/s 
[?25hCollecting huggingface-hub>=0.0.12
  Downloading huggingface_hub-0.0.17-py3-none-any.whl (52 kB)
[K     |████████████████████████████████| 52 kB 538 kB/s 
Collecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 42.6 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[K     |████████████████████████████████| 636 kB 47.1 MB/s 
Installing collected packages: tokenizers, sacremoses, pyyaml, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Found existing installation: Py

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pickle
import numpy as np
import pandas as pd
from transformers import BertTokenizer, TFAutoModel, AlbertTokenizerFast
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.callbacks import ModelCheckpoint

drive_loc = './drive/MyDrive/'

#Load Data
with open(f'{drive_loc}Word_Arrays_Article_Altered.pkl', 'rb') as f:
    training, testing = pickle.load(f)

seq_len = 50 #512
batchs = 64

model_name = 'bert-base-cased'
#model_name = 'albert-base-v2'

tokenizer = BertTokenizer.from_pretrained(model_name)
#tokenizer = AlbertTokenizerFast.from_pretrained(model_name)
ori_test = testing[:, 1]

In [None]:
training_len = len(training)
dataset = np.vstack((training, testing))
dataset[:, 1] = dataset[:, 1].astype(str)

In [None]:
lister = []
for string in dataset[:, 0]:
  lenner = len(string.split())
  lister.append(lenner+1)

In [None]:
np.array(lister).std()

413.3462714169685

In [None]:
_input_ = training[-1,0]
_tok_ =  tokenizer(_input_, 
                      max_length = seq_len, 
                      truncation=True,
                      padding='max_length', 
                      add_special_tokens=True,
                      return_tensors='np'
                      )

In [None]:
def data_tokenised(data, tok, seq_len):
    
    samples = len(data)
    num_labels = data[:, 1].astype(int).max()
    
    x_ids = np.zeros((samples, seq_len))
    x_mask = np.zeros((samples, seq_len))
    
    for i in range(len(data)):
        _input_ = data[i, 0]
        _tok_ =  tok(_input_, 
                      max_length = seq_len, 
                      truncation=True,
                      padding='max_length', 
                      add_special_tokens=True,
                      return_tensors='np'
                      )
        x_ids[i, :] = _tok_['input_ids']
        x_mask[i, :] = _tok_['attention_mask']
    
    lab = np.zeros((samples, num_labels+1))
    
    lab[np.arange(samples), data[:, 1].astype(int)] = 1
    lab = lab[:, 1].reshape(-1, 1)
    
    return x_ids, x_mask, lab

dataset = data_tokenised(dataset, tokenizer, seq_len)

In [None]:
training = dataset[0][:training_len], dataset[1][:training_len], dataset[2][:training_len]
testing = dataset[0][training_len:], dataset[1][training_len:], dataset[2][training_len:]

In [None]:
X = {'input_ids':training[0],
     'attention_mask':training[1]}
y = training[2]

X_test = {'input_ids':testing[0],
          'attention_mask':testing[1]}
y_test = testing[2]

In [None]:

checkpoint_dir = f'{drive_loc}/Headline_Weights'
#checkpoint_dir = './Checkpoints/weights'

checkpoint = ModelCheckpoint(checkpoint_dir, monitor='val_accuracy', verbose=0, save_best_only=True, mode='auto')#, save_weights_only=True, mode='auto')  
callbacks = [checkpoint]

def create_model():
  BERT = TFAutoModel.from_pretrained(model_name)

  input_ids = tf.keras.layers.Input(shape=(seq_len, ), name='input_ids', dtype='int32')
  mask = tf.keras.layers.Input(shape=(seq_len, ), name='attention_mask', dtype='int32')

  embeddings = BERT.bert(input_ids, attention_mask=mask)['pooler_output'] #Try out index 0
  #embeddings = BERT.albert(input_ids, attention_mask=mask)['pooler_output']

  den_1 = tf.keras.layers.Dense(1000, activation='relu')(embeddings)
  #den_2  = tf.keras.layers.Dense(1000, activation='relu')(den_1)
  output = tf.keras.layers.Dense(1, activation='sigmoid')(den_1)

  model = tf.keras.Model(inputs=[input_ids, mask], outputs=output)

  #opt = tfa.optimizers.AdamW()
  opt = tf.keras.optimizers.Adam(learning_rate = 1e-5)
  loss = tf.keras.losses.BinaryCrossentropy()


  model.compile(opt, loss=loss, metrics=['accuracy'])
  #model.layers[0].trainable = False
  #model.layers[1].trainable = False
  #model.layers[2].trainable = False

  return model


In [None]:
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
strategy = tf.distribute.TPUStrategy(tpu)

# Create model
with strategy.scope():
    model = create_model()

INFO:absl:Entering into master device scope: /job:worker/replica:0/task:0/device:CPU:0


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Initializing the TPU system: grpc://10.58.31.58:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.58.31.58:8470


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Found TPU system:


INFO:tensorflow:Found TPU system:


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


Downloading:   0%|          | 0.00/527M [00:00<?, ?B/s]

Some layers from the model checkpoint at bert-base-cased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [None]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_ids (InputLayer)          [(None, 50)]         0                                            
__________________________________________________________________________________________________
attention_mask (InputLayer)     [(None, 50)]         0                                            
__________________________________________________________________________________________________
bert (TFBertMainLayer)          TFBaseModelOutputWit 108310272   input_ids[0][0]                  
                                                                 attention_mask[0][0]             
__________________________________________________________________________________________________
dense (Dense)                   (None, 1000)         769000      bert[0][1]                   

In [None]:
"""
history = model.fit(X, y,
                    validation_data = (X_test, y_test),
                    epochs = 1,
                    batch_size = batchs,
                    #callbacks = callbacks,
                    verbose=1
                    )"""

'\nhistory = model.fit(X, y,\n                    validation_data = (X_test, y_test),\n                    epochs = 1,\n                    batch_size = batchs,\n                    #callbacks = callbacks,\n                    verbose=1\n                    )'

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

epochs = 50
histories = {}

best_results = 0

for i in range(epochs):
  print(f'Epoch {i}')
  history = model.fit(X, y,
                      validation_data = (X_test, y_test),
                      epochs = 1,
                      batch_size = batchs,
                      verbose=1
                      )
  hist = history.history
  for key in hist.keys():
      if key not in histories:
        histories[key] = []
      else:
        histories[key].append(hist[key][0])
  
  val_acc = hist['val_accuracy'][0]

  if val_acc > best_results:
    print('New Best')
    preds = model.predict(X_test)

    preds[preds>0.5] = 1
    preds[preds<=0.5] = 0

    best_results = val_acc

    accuracy = accuracy_score(y_test, preds)
    prec = precision_score(y_test, preds)
    recall = recall_score(y_test, preds)
    f1 = f1_score(y_test,preds)

    

  

Epoch 0


INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 50) dtype=float32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 50) dtype=float32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None, 1) dtype=float32>]
INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 50) dtype=float32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 50) dtype=float32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None, 1) dtype=float32>]




INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 50) dtype=float32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 50) dtype=float32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None, 1) dtype=float32>]


New Best


INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 50) dtype=float32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 50) dtype=float32>]


Epoch 1
Epoch 2
New Best
Epoch 3
New Best
Epoch 4
Epoch 5
Epoch 6
New Best
Epoch 7
Epoch 8
Epoch 9
Epoch 10
New Best
Epoch 11
New Best
Epoch 12
Epoch 13
Epoch 14
Epoch 15
Epoch 16
Epoch 17
Epoch 18
Epoch 19
Epoch 20
Epoch 21
Epoch 22
Epoch 23
Epoch 24
Epoch 25
Epoch 26
Epoch 27
New Best
Epoch 28
Epoch 29
Epoch 30
Epoch 31
Epoch 32
Epoch 33
Epoch 34
New Best
Epoch 35
Epoch 36
Epoch 37
Epoch 38
Epoch 39
Epoch 40
Epoch 41
Epoch 42
Epoch 43
Epoch 44
Epoch 45
Epoch 46
Epoch 47
Epoch 48
Epoch 49


In [None]:
#Results
#hist = model.history

histories['Metrics'] = {'Accuracy':accuracy,
                        'Precision_Score':prec,
                        'Recall':recall,
                        'f1_score':f1}


#with open(f'{drive_loc}/History-Headlines-Final.pkl', 'wb') as f:
    #pickle.dump(histories, f)
