In [8]:
import os
import tensorflow as tf
from util import constants
from util.config_util import get_model_params, get_task_params, get_train_params
from tf2_models.trainer import Trainer
from absl import app
from absl import flags

from util.models import MODELS
from util.tasks import TASKS

%matplotlib inline
import pandas as pd
import seaborn as sns; sns.set()

In [9]:
student_exp_name='samira_fd1'
teacher_exp_name='0.0001_offlineteacher_v3'
teacher_config='small_lstm_v4'
task_name = 'word_sv_agreement_vp'
student_model='cl_gpt2'
teacher_model='cl_lstm'
student_config='small_gpt_v9'
distill_config='pure_distill_2'
distill_mode='offline'

chkpt_dir='../tf_ckpts'

In [10]:
task = TASKS[task_name](get_task_params(), data_dir='../data')

Vocab len:  10034


In [11]:
cl_token = task.databuilder.sentence_encoder().encode(constants.bos)
teacher_model = MODELS[teacher_model](hparams=get_model_params(task, teacher_model, teacher_config), cl_token=cl_token)
std_hparams=get_model_params(task, student_model, student_config)
std_hparams.output_attentions = True
std_hparams.output_embeddings = True
student_model = MODELS[student_model](
std_hparams, cl_token=cl_token)

model config: small_lstm_v4
{'hidden_dim': 256, 'embedding_dim': 256, 'depth': 2, 'hidden_dropout_rate': 0.8, 'input_dropout_rate': 0.2, 'initializer_range': 0.1}
model config: small_gpt_v9
{'embedding_dim': 128, 'resid_pdrop': 0.4, 'embd_pdrop': 0.2, 'attn_pdrop': 0.6, 'initializer_range': 0.05}


In [12]:
student_ckpt_dir = os.path.join(chkpt_dir, task.name,
                              '_'.join([distill_mode,distill_config,
                                        "teacher", teacher_model.model_name, 
                                        #teacher_config,
                                        teacher_exp_name,
                                       "student",student_model.model_name,
                                        str(student_config),
                                        student_exp_name]))
print(student_ckpt_dir)

../tf_ckpts/word_sv_agreement_vp/offline_pure_distill_2_teacher_cl_lstm_em-256_h-256_d-2_hdrop-0.8_indrop-0.2_0.0001_offlineteacher_v3_student_cl_gpt2_h-128_d-6_rdrop-0.4_adrop-0.6_indrop-0.2_small_gpt_v9_samira_fd1


In [13]:
student_ckpt = tf.train.Checkpoint(net=student_model)
student_manager = tf.train.CheckpointManager(student_ckpt, student_ckpt_dir, max_to_keep=None)

student_ckpt.restore(student_manager.latest_checkpoint)
if student_manager.latest_checkpoint:
  print("Restored student from {}".format(student_manager.latest_checkpoint))

Restored student from ../tf_ckpts/word_sv_agreement_vp/offline_pure_distill_2_teacher_cl_lstm_em-256_h-256_d-2_hdrop-0.8_indrop-0.2_0.0001_offlineteacher_v3_student_cl_gpt2_h-128_d-6_rdrop-0.4_adrop-0.6_indrop-0.2_small_gpt_v9_samira_fd1/ckpt-60


In [34]:
sentence = '<bos> the boys who play football with the old man <eos>'
encoded_sentence = task.databuilder.sentence_encoder().encode(sentence)
inputs = tf.convert_to_tensor([encoded_sentence])

with tf.GradientTape() as tape:
    logits, extra= student_model.detailed_call(inputs, training=False)
    embeddings = extra[-1][0]
    for ex in extra:
        if(len(ex) == 1):
            print(len(ex), ex.shape)
        else:
            print(len(ex), ex[0].shape)

    sum_logits = tf.reduce_sum(logits)
    tf.print(sum_logits)

grads = tape.gradient(logits, extra[3])
#grads = tf.reduce_sum(grads, axis=-1)
print(grads)

1 (1, 11, 128)
6 (1, 2, 8, 11, 16)
6 (1, 8, 11, 11)
1 (1, 11, 128)
0.583046198
None


In [6]:
x = tf.Variable(3, name='x')
W1 = tf.Variable(10, name='w1')
W2 = tf.Variable(10, name='w2')


with tf.GradientTape() as tape:
    y = W1 * x
    y2 = W2 * y

grads = tape.gradient(y2, x)       

print(grads)

None
