In [2]:
import os
import tensorflow as tf
from util import constants
from util.config_util import get_model_params, get_task_params, get_train_params
from tf2_models.trainer import Trainer
from absl import app
from absl import flags
import numpy as np
from util.models import MODELS
from util.tasks import TASKS

%matplotlib inline
import pandas as pd
import seaborn as sns; sns.set()

from tqdm import tqdm

In [3]:
def get_reps(inputs, model):
    logits, extra = model.detailed_call(inputs)

def pairwisedot_product_sim(reps1, reps2):
    reps1 = tf.reshape(reps1, (-1, tf.shape(reps1)[-1]))
    reps2 = tf.reshape(reps2, (-1, tf.shape(reps2)[-1]))
    
    pw_dot_product = tf.matmul(reps1, reps2.T)
    
    return pw_dot_product

def dot_product_sim(reps1, reps2):
    # Elementwise multiplication
    dot_product = tf.multiply(reps1, reps2)
    # Sum over last axis to get the dot product similarity between corresponding pairs
    dot_product = tf.reduce_sum(dot_product, axis=-1)
    
    return dot_product

def normalized_dot_product_sim(reps1, reps2):
    #normalize reps:
    reps1 = reps1/tf.norm(reps1)
    reps2 = reps2/tf.norm(reps2)
    
    # Elementwise multiplication
    dot_product = tf.multiply(reps1, reps2)
    # Sum over last axis to get the dot product similarity between corresponding pairs
    dot_product = tf.reduce_sum(dot_product, axis=-1)
    
    return dot_product
    
    
def second_order_rep_sim(reps1, reps2):
    sims1 = pairwisedot_product_sim(reps1, reps1)
    sims2 = pairwisedot_product_sim(reps2, reps2)
    
    so_sims = normalized_dot_product_sim(sims1, sims2)
    
    return np.mean(so_sims), so_sims

def compare_models(inputs, model1, model2):
    reps1 = get_reps(inputs, model1)
    reps2 = get_reps(inputs, model2)
    
    similarity_measures = second_order_rep_sim(reps1, reps2)
    
    return similarity_measures

In [None]:
model_name='lm_gpt2'
model_config='very_big_gpt_v10'
learning_rate=0.0001
exp_name='lisa_offlineteacher_v1'
task_name = 'word_sv_agreement_lm'
chkpt_dir = '../tf_ckpts'

task = TASKS[task_name](get_task_params(), data_dir='../data')

cl_token = task.databuilder.sentence_encoder().encode(constants.bos)
hparams=get_model_params(task, model_name, model_config)
hparams.output_attentions = True
hparams.output_embeddings = True

model = MODELS[model_name](hparams=hparams, cl_token=cl_token)


ckpt_dir = os.path.join(chkpt_dir,task.name,
                        model.model_name+"_"+str(model_config)+"_"+str(learning_rate)+"_"+exp_name)

ckpt = tf.train.Checkpoint(net=model)
manager = tf.train.CheckpointManager(ckpt, ckpt_dir, max_to_keep=None)

ckpt.restore(manager.latest_checkpoint)
if manager.latest_checkpoint:
  print("Restored model from {}".format(manager.latest_checkpoint))
else:
  print("Initialized from scratch!")
  print(ckpt_dir)

model.compile(loss=task.get_loss_fn(), metrics=task.metrics())
#model.evaluate(task.train_dataset, steps=task.n_train_batches)
model.evaluate(task.valid_dataset, steps=task.n_valid_batches)
#model.evaluate(task.train_dataset, steps=task.n_train_batches)

Vocab len:  10034
model config: very_big_gpt_v10
{'embedding_dim': 512, 'resid_pdrop': 0.4, 'embd_pdrop': 0.2, 'attn_pdrop': 0.6, 'initializer_range': 0.05}
Restored model from ../tf_ckpts/word_sv_agreement_lm/lm_gpt2_h-512_d-6_rdrop-0.4_adrop-0.6_indrop-0.2_very_big_gpt_v10_0.0001_lisa_offlineteacher_v1/ckpt-60
