In [1]:
import os
import tensorflow as tf
from util import constants
from util.config_util import get_model_params, get_task_params, get_train_params
from tf2_models.trainer import Trainer
from absl import app
from absl import flags
import numpy as np
from util.models import MODELS
from util.tasks import TASKS
from notebook_utils import *

%matplotlib inline
import pandas as pd
import seaborn as sns; sns.set()

from tqdm import tqdm


In [2]:



task1 = 'word_sv_agreement_lm'
task2 = 'word_sv_agreement_vp'
task3 = 'sst2'
task4 = 'lm1b'

chkpt_dir='../tf_ckpts'
teacher_task = TASKS[task3](get_task_params(), data_dir='../data')
student_task = TASKS[task4](get_task_params(), data_dir='../data')

cl_token = teacher_task.sentence_encoder().encode(constants.bos)




In [None]:
for (x1,y1), (x2,y2) in zip(teacher_task.valid_dataset, student_task.valid_dataset):
    print(x1[0], x2[0])
    break

In [None]:
x1

In [None]:

tf.gather_nd(x1, axis=1, indices=tf.ones(64))

In [None]:
strategy = tf.distribute.experimental.CentralStorageStrategy()  # with 1 CPU and 1 GPU
dataset = teacher_task.valid_dataset
dist_dataset = strategy.experimental_distribute_dataset(dataset)
for x,y in dist_dataset:
  print(x.shape, y.shape)
  break

In [3]:
config = {'model_name':'lm_lstm_shared_emb',
        'model_config':'small_lstm_v4',
        'learning_rate':0.001,
        'exp_name':'test',
        'chkpt_dir': '../tf_ckpts'
}
hparams=get_model_params(student_task, config['model_name'], config['model_config'])
hparams.output_attentions = True
hparams.output_embeddings = True

lm_lstm, ckpt = get_model(config, student_task, hparams, cl_token)

config = {'model_name':'lm_gpt2_shared',
        'model_config':'small_gpt_v9',
        'learning_rate':0.001,
        'exp_name':'test',
        'chkpt_dir': '../tf_ckpts'
}
hparams=get_model_params(student_task, config['model_name'], config['model_config'])
hparams.output_attentions = True
hparams.output_embeddings = True

lm_gpt, ckpt = get_model(config, student_task, hparams, cl_token)


config = {'model_name':'cl_lstm',
        'model_config':'small_lstm_v4',
        'learning_rate':0.001,
        'exp_name':'test',
        'chkpt_dir': '../tf_ckpts'
}
hparams=get_model_params(student_task, config['model_name'], config['model_config'])
hparams.output_attentions = True
hparams.output_embeddings = True

cl_lstm, ckpt = get_model(config, student_task, hparams, cl_token)

config = {'model_name':'cl_gpt2_shared',
        'model_config':'small_gpt_v9',
        'learning_rate':0.001,
        'exp_name':'test',
        'chkpt_dir': '../tf_ckpts'
}
hparams=get_model_params(student_task, config['model_name'], config['model_config'])
hparams.output_attentions = True
hparams.output_embeddings = True

cl_bert, ckpt = get_model(config, student_task, hparams, cl_token)

model config: small_lstm_v4
{'hidden_dim': 256, 'embedding_dim': 256, 'depth': 2, 'hidden_dropout_rate': 0.8, 'input_dropout_rate': 0.2, 'initializer_range': 0.1}
No checkpoint found ../tf_ckpts/lm1b/lm_lstm_shared_emb_em-256_h-256_d-2_hdrop-0.8_indrop-0.2_small_lstm_v4_0.001_test
model config: small_gpt_v9
{'embedding_dim': 128, 'resid_pdrop': 0.4, 'embd_pdrop': 0.2, 'attn_pdrop': 0.6, 'initializer_range': 0.05}
No checkpoint found ../tf_ckpts/lm1b/lm_gpt2_h-128_d-6_rdrop-0.4_adrop-0.6_indrop-0.2_small_gpt_v9_0.001_test
model config: small_lstm_v4
{'hidden_dim': 256, 'embedding_dim': 256, 'depth': 2, 'hidden_dropout_rate': 0.8, 'input_dropout_rate': 0.2, 'initializer_range': 0.1}
No checkpoint found ../tf_ckpts/lm1b/cl_lstm_em-256_h-256_d-2_hdrop-0.8_indrop-0.2_small_lstm_v4_0.001_test
model config: small_gpt_v9
{'embedding_dim': 128, 'resid_pdrop': 0.4, 'embd_pdrop': 0.2, 'attn_pdrop': 0.6, 'initializer_range': 0.05}
No checkpoint found ../tf_ckpts/lm1b/cl_gpt2_shared_weights_h-128_d

In [4]:
for x,y in student_task.valid_dataset:
  print(x.shape, y.shape)
  print(lm_lstm.scope, len(lm_lstm.detailed_call(x)))
  print(lm_lstm.detailed_call(x)[lm_lstm.rep_index][lm_lstm.rep_layer].shape)
  print(lm_gpt.scope, len(lm_gpt.detailed_call(x)))
  print(lm_gpt.detailed_call(x)[lm_gpt.rep_index].shape)
  print(cl_bert.scope, len(cl_bert.detailed_call(x)))
  print(cl_bert.detailed_call(x)[cl_bert.rep_index].shape)
  print(cl_lstm.scope,len(cl_lstm.detailed_call(x)))
  print(cl_lstm.detailed_call(x)[cl_lstm.rep_index][cl_lstm.rep_layer].shape)
  break

(64, 76) (64, 76)
lm_lstm_shared_emb 4
(64, 76, 256)
lm_gpt2 5
(64, 76, 128)
cl_gpt2_shared_weights 6
(64, 76, 128)
cl_lstm 3
(64, 76, 256)


In [None]:
for x,y in student_task.valid_dataset:
    reps1 = lm_lstm.detailed_call(x)[lm_lstm.rep_index][lm_lstm.rep_layer]
    reps2 = cl_bert.detailed_call(x)[cl_bert.rep_index]
    break

In [None]:
from distill.repsim_util import *
rep_loss(reps1, reps2, padding_symbol=tf.constant(student_task.input_padding_symbol,  dtype=tf.int64))

In [None]:
x

In [None]:
padding_symbol = tf.constant(student_task.input_padding_symbol, dtype=tf.int64)

In [None]:
reps1 = tf.reshape(reps1, (-1, tf.shape(reps1)[-1]))
reps2 = tf.reshape(reps2, (-1, tf.shape(reps2)[-1]))

if padding_symbol is not None and padding_symbol > -1:
    padding_mask = 1.0 -tf.cast(x == padding_symbol, dtype=tf.float32)
    padding_mask = tf.reshape(padding_mask, (-1, 1))
else:
    padding_mask = tf.ones((tf.shape(reps1)[0], 1))
mean_sim, _ = second_order_rep_sim(reps1, reps2, padding_mask)



In [None]:
1 - mean_sim