In [1]:
import pickle
import os
import numpy as np
import matplotlib.pyplot as plt

from utils.funs import count_outliers
from utils.symmetry_scores import get_scores_full

from transformers import RobertaModel, AutoModelForMaskedLM

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dir = '../../../../_data/fig-symmetry-language-models/full-models/ROBERTA-query-key.pkl'

if os.path.isfile(dir):
    with open(dir, 'rb') as file:
        models = pickle.load(file)
else: models = {}

In [3]:
path = ["encoder.layer[", "].attention.self.query.weight", "].attention.self.key.weight"]

'ROBERTA base (l = 24, d = 1024, h = 16 ; 125M parameters)'
dh = 64
l = 12
d = 768
h = d // dh

model = RobertaModel.from_pretrained('roberta-base')
score_List= get_scores_full(d, l, h, dh, model, path)
models['ROBERTA-base'] = [l, d, h, dh, score_List]

'ROBERTA large (l = 24, d = 1024, h = 16 ; 355M parameters)'
dh = 64
l = 23
d = 1024
h = d // dh

model = RobertaModel.from_pretrained('roberta-large')
score_List= get_scores_full(d, l, h, dh, model, path)
models['ROBERTA-large'] = [l, d, h, dh, score_List]

path = ["roberta.encoder.layer[", "].attention.self.query.weight", "].attention.self.key.weight"]

'DistillROBERTA base (l = 6, d = 768, h = 12 ; 82.2M parameters)'
dh = 64
l = 6
d = 768
h = d // dh

model = AutoModelForMaskedLM.from_pretrained("distilbert/distilroberta-base")
score_List= get_scores_full(d, l, h, dh, model, path)
models['distill-ROBERTA'] = [l, d, h, dh, score_List]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of the model checkpoint at distilbert/distilroberta-base were not used when initializing RobertaForMaskedLM: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model

In [4]:
'save'
with open(dir, 'wb') as file:
    pickle.dump(models, file)

In [None]:
from utils.visualization import symmetry_score_boxplot, symmetry_score_scatter, symmetry_score_outliers

symmetry_score_boxplot(models)
symmetry_score_scatter(models)
symmetry_score_outliers(models)