In [2]:
!pip install transformers
from tensorflow import keras
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers, Input
from transformers import TFDistilBertModel
import glob
from tensorflow.keras.utils import Progbar
from google.colab import drive
import os
from pathlib import Path
import json
import seaborn as sns
from matplotlib import pyplot as plt
import pickle as pkl
from reddit import load_tfrecord_triplet, split_dataset

In [3]:
drive.mount('/content/drive/')
%cd 'drive/My Drive/personality_reddit'

Mounted at /content/drive/
/content/drive/My Drive/personality_reddit


## Initialize TPU strategy

In [5]:
try:
  tpu_address = 'grpc://' + os.environ['COLAB_TPU_ADDR']
  tpu = tf.distribute.cluster_resolver.TPUClusterResolver(tpu_address)
  tf.config.experimental_connect_to_cluster(tpu)
  tf.tpu.experimental.initialize_tpu_system(tpu)
  strategy = tf.distribute.experimental.TPUStrategy(tpu)
  print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])  
  print("Number of accelerators: ", strategy.num_replicas_in_sync)
except ValueError:
  print('TPU failed to initialize.')

INFO:absl:Entering into master device scope: /job:worker/replica:0/task:0/device:CPU:0


INFO:tensorflow:Initializing the TPU system: grpc://10.109.133.186:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.109.133.186:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Found TPU system:


INFO:tensorflow:Found TPU system:


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


Running on TPU  ['10.109.133.186:8470']
Number of accelerators:  8


In [6]:
AUTO = tf.data.experimental.AUTOTUNE

## Define dataset parameters

In [None]:
batch_size = 1 # >1 not feasible on Colab TPUs
global_batch_size = batch_size * strategy.num_replicas_in_sync
n_posts = 50
samples_in_toy_dataset = 10000

## Load and distribute dataset

In [8]:
from google.colab import auth
auth.authenticate_user()
!gcloud config set project thematic-cursor-254011

Updated property [core/project].


In [9]:
fre = 'gs://personality_reddit/triplet_nn1/*-of-999.tfrecord'
fnames = tf.io.gfile.glob(fre)

In [None]:
ds = load_tfrecord_triplet_nn1(filenames=fnames, num_parallel_calls=AUTO)
ds = ds.map(lambda x: tf.reverse(x['input_ids'], [0])[:n_posts,:]) \
       .prefetch(AUTO) \
       .padded_batch(global_batch_size, 
                     padded_shapes=[n_posts, None],
                     drop_remainder=True)

In [12]:
ds_tuning, ds_train, ds_val, ds_test = split_dataset(ds, tuning=10000)
ds_tuning, ds_val, ds_test = (strategy.experimental_distribute_dataset(d) for d in [ds_tuning, ds_val, ds_test])

## Define model

## Training protocol

Define parameters for training

In [20]:
EPOCHS = 10
tot_train_steps = n_train * EPOCHS
warmup_steps = int(EPOCHS * n_train * 0.1 / global_batch_size)

Specify checkpoint to load (for epoch > 0)

Set up training functions

Define logging functions

Define training loop

Run training loop for hyperparameter tuning with small dataset