In [2]:
import sys
import os

# Get the abs path of "../model_merging"
abs_path = os.path.abspath("../model_merging")
if abs_path not in sys.path:
    sys.path.append(abs_path)
os.chdir(abs_path)

In [29]:
"""Script for actually merging models."""
import os

from transformers import TFAutoModelForSequenceClassification, AutoTokenizer

from model_merging import data
from model_merging import evaluation
from model_merging import hdf5_util
from model_merging import merging


def load_models(models, from_pt):
    output_models = []
    for i, model_str in enumerate(models):
        model_str = os.path.expanduser(model_str)
        model = TFAutoModelForSequenceClassification.from_pretrained(
            model_str, from_pt=from_pt
        )
        output_models.append(model)
        if i == 0:
            tokenizer = AutoTokenizer.from_pretrained(model_str)
    return output_models, tokenizer


def load_fishers(fishers):
    if not fishers:
        return None
    fishers = []
    for fisher_str in fishers:
        fisher_str = os.path.expanduser(fisher_str)
        fisher = hdf5_util.load_variables_from_hdf5(fisher_str, trainable=False)
        fishers.append(fisher)
    return fishers


def get_coeffs_set(models, coeff_mode, n_coeffs):
    n_models = len(models)
    if coeff_mode == "grid":
        assert n_models == 2
        return merging.create_pairwise_grid_coeffs(n_coeffs)
    elif coeff_mode == "random":
        return merging.create_random_coeffs(n_models, n_coeffs)
    else:
        raise ValueError


def get_best_results(results):
    return max(results, key=lambda r: evaluation.average_score(r.score))

# flags.DEFINE_integer("n_examples", 4096, "")
# flags.DEFINE_integer("batch_size", 32, "")
# flags.DEFINE_integer("sequence_length", 128, "")

# flags.DEFINE_integer("n_coeffs", 51, "")
# flags.DEFINE_enum("coeff_mode", "grid", ["grid", "random"], "")

# flags.DEFINE_float("fisher_floor", 1e-6, "")
# flags.DEFINE_bool("favor_target_model", True, "")
# flags.DEFINE_bool("normalize_fishers", True, "")
def merge_and_evaluate(
    models,
    fishers,
    glue_task,
    split="validation",
    from_pt=True,
    n_examples=4096,
    batch_size=32,
    sequence_length=128,
    fisher_floor=1e-6,
    favor_target_model=True,
    normalize_fishers=True,
    coeff_mode="grid",
    n_coeffs=51,
):
    if fishers:
        assert len(fishers) == len(models)

    models, tokenizer = load_models(models, from_pt)

    fishers = load_fishers(fishers)

    ds = data.load_glue_dataset(
        task=glue_task,
        split=split,
        tokenizer=tokenizer,
        max_length=sequence_length,
    )
    ds = ds.take(n_examples).batch(batch_size)

    metric = evaluation.load_metric_for_glue_task(glue_task)

    coefficients_set = get_coeffs_set(models, coeff_mode, n_coeffs)

    results = merging.merging_coefficients_search(
        models,
        coefficients_set=coefficients_set,
        dataset=ds,
        metric=metric,
        fishers=fishers,
        fisher_floor=fisher_floor,
        favor_target_model=favor_target_model,
        normalize_fishers=normalize_fishers,
    )

    best = get_best_results(results)
    print(80 * "*")
    print(" Best Merge")
    print(80 * "*")
    merging.print_merge_result(best)


In [30]:
# EVAL_TASK=rte

# # Using PavanNeerudu/gpt2-finetuned-mnli doesn't work b/c of tokenizer issues
# # RTE_MODEL=PavanNeerudu/gpt2-finetuned-rte
# # MNLI_MODEL=PavanNeerudu/gpt2-finetuned-mnli

# # We need to fix the tokenizer issues locally to get this to work
# # Relative to the model_merging directory
# RTE_MODEL=../data/gpt2-finetuned-rte-fixed
# MNLI_MODEL=../data/gpt2-finetuned-mnli-fixed

# # Isometric merge.
# python3 ./scripts/merge_and_evaluate.py  \
#     --models=$RTE_MODEL,$MNLI_MODEL \
#     --glue_task=$EVAL_TASK \
#     --n_examples=100

merge_and_evaluate(
    models=["../data/gpt2-finetuned-rte-fixed", "../data/gpt2-finetuned-mnli-fixed"],
    fishers=None,
    glue_task="rte",
    split="validation",
    n_examples=4096,
    batch_size=32,
    sequence_length=128,
    fisher_floor=1e-6,
    favor_target_model=True,
    normalize_fishers=True,
)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFGPT2ForSequenceClassification: ['transformer.h.11.attn.masked_bias', 'transformer.h.8.attn.masked_bias', 'transformer.h.5.attn.masked_bias', 'transformer.h.6.attn.masked_bias', 'transformer.h.2.attn.masked_bias', 'transformer.h.0.attn.masked_bias', 'transformer.h.4.attn.masked_bias', 'transformer.h.1.attn.masked_bias', 'transformer.h.9.attn.masked_bias', 'transformer.h.10.attn.masked_bias', 'transformer.h.3.attn.masked_bias', 'transformer.h.7.attn.masked_bias']
- This IS expected if you are initializing TFGPT2ForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFGPT2ForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a B

InvalidArgumentError: {{function_node __wrapped__Pack_N_2_device_/job:localhost/replica:0/task:0/device:CPU:0}} Shapes of all inputs must match: values[0].shape = [768,2] != values[1].shape = [768,3] [Op:Pack] name: packed