In [3]:
import os
import tensorflow as tf
from util import constants
from util.config_util import get_model_params, get_task_params, get_train_params
from tf2_models.trainer import Trainer
from absl import app
from absl import flags

from util.models import MODELS
from util.tasks import TASKS

%matplotlib inline
import pandas as pd
import seaborn as sns; sns.set()

In [4]:
student_exp_name='samira_fd1'
teacher_exp_name='0.0001_offlineteacher_v3'
teacher_config='small_lstm_v4'
task_name = 'word_sv_agreement_vp'
student_model='cl_gpt2'
teacher_model='cl_lstm'
student_config='small_gpt_v9'
distill_config='pure_distill_2'
distill_mode='offline'

chkpt_dir='../tf_ckpts'

In [5]:
task = TASKS[task_name](get_task_params(), data_dir='../data')

Vocab len:  10034


In [6]:
cl_token = task.databuilder.sentence_encoder().encode(constants.bos)
teacher_model = MODELS[teacher_model](hparams=get_model_params(task, teacher_model, teacher_config), cl_token=cl_token)
std_hparams=get_model_params(task, student_model, student_config)
std_hparams.output_attentions = True
std_hparams.output_embeddings = True
student_model = MODELS[student_model](
std_hparams, cl_token=cl_token)

model config: small_lstm_v4
{'hidden_dim': 256, 'embedding_dim': 256, 'depth': 2, 'hidden_dropout_rate': 0.8, 'input_dropout_rate': 0.2, 'initializer_range': 0.1}
model config: small_gpt_v9
{'embedding_dim': 128, 'resid_pdrop': 0.4, 'embd_pdrop': 0.2, 'attn_pdrop': 0.6, 'initializer_range': 0.05}


In [7]:
student_ckpt_dir = os.path.join(chkpt_dir, task.name,
                              '_'.join([distill_mode,distill_config,
                                        "teacher", teacher_model.model_name, 
                                        #teacher_config,
                                        teacher_exp_name,
                                       "student",student_model.model_name,
                                        str(student_config),
                                        student_exp_name]))
print(student_ckpt_dir)

../tf_ckpts/word_sv_agreement_vp/offline_pure_distill_2_teacher_cl_lstm_em-256_h-256_d-2_hdrop-0.8_indrop-0.2_0.0001_offlineteacher_v3_student_cl_gpt2_h-128_d-6_rdrop-0.4_adrop-0.6_indrop-0.2_small_gpt_v9_samira_fd1


In [8]:
student_ckpt = tf.train.Checkpoint(net=student_model)
student_manager = tf.train.CheckpointManager(student_ckpt, student_ckpt_dir, max_to_keep=None)

student_ckpt.restore(student_manager.latest_checkpoint)
if student_manager.latest_checkpoint:
  print("Restored student from {}".format(student_manager.latest_checkpoint))

Restored student from ../tf_ckpts/word_sv_agreement_vp/offline_pure_distill_2_teacher_cl_lstm_em-256_h-256_d-2_hdrop-0.8_indrop-0.2_0.0001_offlineteacher_v3_student_cl_gpt2_h-128_d-6_rdrop-0.4_adrop-0.6_indrop-0.2_small_gpt_v9_samira_fd1/ckpt-60


In [None]:
student_model.compile(loss=task.get_loss_fn(), metrics=task.metrics())

In [None]:
student_model.evaluate(task.test_dataset, steps=100)

In [None]:
test_iter  = iter(task.test_dataset)

In [None]:
examples_x, examples_y = next(test_iter)
student_logits = student_model(examples_x)

In [1]:
%%javascript
require.config({
  paths: {
      d3: '//cdnjs.cloudflare.com/ajax/libs/d3/3.4.8/d3.min',
      jquery: '//ajax.googleapis.com/ajax/libs/jquery/2.0.0/jquery.min',
  }
});

<IPython.core.display.Javascript object>

In [2]:
from attention import *
%matplotlib inline

index=16
example = examples_x[index]
sentence = task.databuilder.sentence_encoder().decode(example)
print(sentence)
print(student_logits[index])
print(examples_y[index])
logits, (extra) = student_model.detailed_call(examples_x)
last_state, presents, attentions, embeddings = extra

_attentions = [att.numpy() for att in attentions]
attentions_mat = np.asarray(_attentions)[:,index]
print(attentions_mat.shape)
print(embeddings.shape)
show(sentence.split(), sentence.split(), attentions_mat)

NameError: name 'examples_x' is not defined

In [3]:
from attention import *
%matplotlib inline

index=0
sentence =  '<bos> the young girl who plays with the older kids <eos>'

#'<bos> the boys who play football with the old man <eos>'
encoded_sentence = task.databuilder.sentence_encoder().encode(sentence)
logits, extra = student_model.detailed_call(np.asarray([encoded_sentence]))
last_state, presents, attentions, embeddings = extra
print(student_logits[index])
_attentions = [att.numpy() for att in attentions]
attentions_mat = np.asarray(_attentions)[:,index]
print(attentions_mat.shape)
show(sentence.split(), sentence.split(), attentions_mat)
student_attentions = attentions_mat

NameError: name 'task' is not defined

In [15]:
teacher_ckpt_dir = os.path.join(chkpt_dir, task.name,
                                  '_'.join([teacher_model.model_name, teacher_config,teacher_exp_name]))

teacher_ckpt = tf.train.Checkpoint(net=teacher_model)
teacher_manager = tf.train.CheckpointManager(teacher_ckpt, teacher_ckpt_dir, max_to_keep=None)

teacher_ckpt.restore(teacher_manager.latest_checkpoint)
if teacher_manager.latest_checkpoint:
  print("Restored student from {}".format(teacher_manager.latest_checkpoint))

Restored student from ../tf_ckpts/word_sv_agreement_vp/cl_lstm_em-256_h-256_d-2_hdrop-0.8_indrop-0.2_small_lstm_v4_0.0001_offlineteacher_v3/ckpt-60


In [16]:
teacher_model.compile(loss=task.get_loss_fn(), metrics=task.metrics())

In [29]:
index=0
sentence =  '<bos> the young girl who plays with the older kids <eos>'
#'<bos> the boys who play football with the old man <eos>'
encoded_sentence = task.databuilder.sentence_encoder().encode(sentence)
teacher_logits = teacher_model(np.asarray([encoded_sentence]))
print(teacher_logits[index])


tf.Tensor([ 2.3365955 -2.1648035], shape=(2,), dtype=float32)


In [20]:
model_name='cl_gpt2'
model_config='small_gpt_v9'
learning_rate=0.0001
exp_name='offlineteacher_v1'

cl_token = task.databuilder.sentence_encoder().encode(constants.bos)
hparams=get_model_params(task, model_name, model_config)
hparams.output_attentions = True
hparams.output_embeddings = True

model = MODELS[model_name](hparams=hparams, cl_token=cl_token)


ckpt_dir = os.path.join(chkpt_dir,task.name,
                        model.model_name+"_"+str(model_config)+"_"+str(learning_rate)+"_"+exp_name)

ckpt = tf.train.Checkpoint(net=model)
manager = tf.train.CheckpointManager(ckpt, ckpt_dir, max_to_keep=None)

ckpt.restore(manager.latest_checkpoint)
if manager.latest_checkpoint:
  print("Restored student from {}".format(manager.latest_checkpoint))

model.compile(loss=task.get_loss_fn(), metrics=task.metrics())
model.evaluate(task.test_dataset, steps=100)

model config: small_gpt_v9
{'embedding_dim': 128, 'resid_pdrop': 0.4, 'embd_pdrop': 0.2, 'attn_pdrop': 0.6, 'initializer_range': 0.05}
Restored student from ../tf_ckpts/word_sv_agreement_vp/cl_gpt2_h-128_d-6_rdrop-0.4_adrop-0.6_indrop-0.2_small_gpt_v9_0.0001_offlineteacher_v1/ckpt-60










































































































































































































































































































































































































































































































































































































































































[0.2104924625903368, 0.19008262, 0.951875]

In [33]:
from attention import *
%matplotlib inline

index=0
sentence = '<bos> the young girl who plays with the older kids <eos>'
encoded_sentence = task.databuilder.sentence_encoder().encode(sentence)
logits, extra = model.detailed_call(np.asarray([encoded_sentence]))
last_state, presents, attentions, embeddings = extra
print(student_logits[index])
_attentions = [att.numpy() for att in attentions]
attentions_mat = np.asarray(_attentions)[:,index]
print(attentions_mat.shape)
show(sentence.split(), sentence.split(), attentions_mat)
independent_model_attentions = attentions_mat

tf.Tensor([-4.662724  5.355209], shape=(2,), dtype=float32)
(6, 8, 11, 11)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [43]:
diff_attentions = np.maximum(student_attentions - independent_model_attention, 0)
show(sentence.split(), sentence.split(), diff_attentions)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>