*   This Notebook contains the code used to generate the Benchmark tables from the Numerical results section.

*   Connect the Notebook to a GPU (e.g. T4 or A100) in order to run the code

# Environment settings

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import sys
sys.path.append('/content/drive/MyDrive/MA2/Master Thesis/project')

Mounted at /content/drive


In [None]:
!pip install transformers
!pip install -U datasets

Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.6.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2025.3.0-py3-none-any.whl (193 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m19.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fsspec, datasets
  Attempting uninstall: fsspec
    Found existing installation: fsspec 2025.3.2
    Uninstalling fsspec-2025.3.2:
      Successfully uninstalled fsspec-2025.3.2
  Attempting uninstall: datasets
    Found existing installation: datasets 2.14.4
    Uninstalling datasets-2.14.4:
      Successfully uninstalled datasets-2.14.4
[31mERROR: pip's dependency re

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pickle

import torch
from nn_tools.nn_compression import (Model, knapsack_greedy_compression, fool_svd_compression, count_parameters, plot_rank_ratio_heatmap)
from nn_tools import algebraic as alg
from nn_tools import fine_tuning as ft

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#The number of classes corresponding to each classification dataset from the GLUE benchmark
classification_dict = {
    "cola" : 2,
    "sst2" : 2,
    "mrpc" : 2,
    "qqp" : 2,
    "mnli" : 3,
    "qnli" : 2,
    "rte" : 2,
    "wnli" : 2,
}

# Base benchmark

Evaluates the fine-tuned models on the GLUE benchmark

In [None]:
name_ = ['bert', 'gpt2', 'roberta']
dataset_name_ = ['cola', 'mrpc', 'rte', 'qnli', 'wnli']

task = 'classification'
split = "validation"

results = []

for name in name_ :
  for dataset_name in dataset_name_ :
      print(f'\n\n name : {name}, dataset_name : {dataset_name}')
      current_result = {}

      weight_path = f'/content/drive/MyDrive/MA2/Master Thesis/project/models/{name}/{name}_{dataset_name}.pth'
      n_class = classification_dict[dataset_name]

      my_Model = Model(name, task, weight_path)
      my_Model.tokenize_dataset(dataset_name, split, train_fraction=1, test_fraction=1)

      base_acc = ft.evaluate(my_Model, dataset_name, split=split)


      current_result['model'] = name
      current_result['dataset'] = dataset_name
      current_result['base accuracy'] = base_acc
      current_result['benchmark'] = ft.evaluate_glue_task(my_Model.model, my_Model.tokenizer, dataset_name, batch_size=32, device=my_Model.device)

      results.append(current_result)



 name : bert, dataset_name : cola


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/251k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/37.6k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/37.7k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/8551 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1043 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1063 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

eval accuracy: 0.8245445829338447


Map:   0%|          | 0/1043 [00:00<?, ? examples/s]



 name : bert, dataset_name : mrpc


train-00000-of-00001.parquet:   0%|          | 0.00/649k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/75.7k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/308k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/3668 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/408 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1725 [00:00<?, ? examples/s]

Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Map:   0%|          | 0/408 [00:00<?, ? examples/s]

eval accuracy: 0.8480392156862745


Map:   0%|          | 0/408 [00:00<?, ? examples/s]



 name : bert, dataset_name : rte


train-00000-of-00001.parquet:   0%|          | 0.00/584k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/69.0k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/621k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2490 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/277 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/3000 [00:00<?, ? examples/s]

Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

eval accuracy: 0.6389891696750902


Map:   0%|          | 0/277 [00:00<?, ? examples/s]



 name : bert, dataset_name : qnli


train-00000-of-00001.parquet:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/872k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/877k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/104743 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/5463 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5463 [00:00<?, ? examples/s]

Map:   0%|          | 0/5463 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Map:   0%|          | 0/5463 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

eval accuracy: 0.8663737872963573


Map:   0%|          | 0/5463 [00:00<?, ? examples/s]



 name : bert, dataset_name : wnli


train-00000-of-00001.parquet:   0%|          | 0.00/38.8k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/11.1k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/13.6k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/635 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/71 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/146 [00:00<?, ? examples/s]

Map:   0%|          | 0/71 [00:00<?, ? examples/s]

Map:   0%|          | 0/71 [00:00<?, ? examples/s]

eval accuracy: 0.5633802816901409


Map:   0%|          | 0/71 [00:00<?, ? examples/s]



 name : gpt2, dataset_name : cola


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

eval accuracy: 0.7785234899328859


Map:   0%|          | 0/1043 [00:00<?, ? examples/s]



 name : gpt2, dataset_name : mrpc


Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Map:   0%|          | 0/408 [00:00<?, ? examples/s]

eval accuracy: 0.803921568627451


Map:   0%|          | 0/408 [00:00<?, ? examples/s]



 name : gpt2, dataset_name : rte


Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

eval accuracy: 0.631768953068592


Map:   0%|          | 0/277 [00:00<?, ? examples/s]



 name : gpt2, dataset_name : qnli


Map:   0%|          | 0/5463 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Map:   0%|          | 0/5463 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

eval accuracy: 0.8848617975471352


Map:   0%|          | 0/5463 [00:00<?, ? examples/s]



 name : gpt2, dataset_name : wnli


Map:   0%|          | 0/71 [00:00<?, ? examples/s]

Map:   0%|          | 0/71 [00:00<?, ? examples/s]

eval accuracy: 0.3380281690140845


Map:   0%|          | 0/71 [00:00<?, ? examples/s]



 name : roberta, dataset_name : cola


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

eval accuracy: 0.8312559923298178


Map:   0%|          | 0/1043 [00:00<?, ? examples/s]



 name : roberta, dataset_name : mrpc


Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Map:   0%|          | 0/408 [00:00<?, ? examples/s]

eval accuracy: 0.8774509803921569


Map:   0%|          | 0/408 [00:00<?, ? examples/s]



 name : roberta, dataset_name : rte


Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

eval accuracy: 0.4729241877256318


Map:   0%|          | 0/277 [00:00<?, ? examples/s]



 name : roberta, dataset_name : qnli


Map:   0%|          | 0/5463 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Map:   0%|          | 0/5463 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

eval accuracy: 0.8890719384953323


Map:   0%|          | 0/5463 [00:00<?, ? examples/s]



 name : roberta, dataset_name : wnli


Map:   0%|          | 0/71 [00:00<?, ? examples/s]

Map:   0%|          | 0/71 [00:00<?, ? examples/s]

eval accuracy: 0.5633802816901409


Map:   0%|          | 0/71 [00:00<?, ? examples/s]

In [None]:
print(results)
with open("/content/drive/MyDrive/MA2/Master Thesis/project/results/benchmark/base_benchmark.pkl", 'wb') as f:
    pickle.dump(results, f)

[{'model': 'bert', 'dataset': 'cola', 'base accuracy': 0.8245445829338447, 'benchmark': {'matthews': np.float64(0.5711485966856213)}}, {'model': 'bert', 'dataset': 'mrpc', 'base accuracy': 0.8480392156862745, 'benchmark': {'accuracy': 0.8406862745098039, 'f1': 0.8888888888888888}}, {'model': 'bert', 'dataset': 'rte', 'base accuracy': 0.6389891696750902, 'benchmark': {'accuracy': 0.631768953068592}}, {'model': 'bert', 'dataset': 'qnli', 'base accuracy': 0.8663737872963573, 'benchmark': {'accuracy': 0.8960278235401794}}, {'model': 'bert', 'dataset': 'wnli', 'base accuracy': 0.5633802816901409, 'benchmark': {'accuracy': 0.2676056338028169}}, {'model': 'gpt2', 'dataset': 'cola', 'base accuracy': 0.7785234899328859, 'benchmark': {'matthews': np.float64(0.43825163224076547)}}, {'model': 'gpt2', 'dataset': 'mrpc', 'base accuracy': 0.803921568627451, 'benchmark': {'accuracy': 0.803921568627451, 'f1': 0.868421052631579}}, {'model': 'gpt2', 'dataset': 'rte', 'base accuracy': 0.631768953068592, '

# Fool benchmark

Compression of the fine-tuned models with SVD, using a uniform truncation rank

In [None]:
name_ = ['bert', 'gpt2', 'roberta']
dataset_name_ = ['cola', 'mrpc', 'rte', 'qnli', 'wnli']
rank_ratio_ = [0.6, 0.5, 0.1]

task = 'classification'
split = "validation"

results = []

for name in name_ :
  for dataset_name in dataset_name_ :
    for rank_ratio in rank_ratio_ :
      print(f'\n\n name : {name}, dataset_name : {dataset_name}, rank ratio : {rank_ratio}')
      current_result = {}

      weight_path = f'/content/drive/MyDrive/MA2/Master Thesis/project/models/{name}/{name}_{dataset_name}.pth'
      n_class = classification_dict[dataset_name]

      my_Model = Model(name, task, weight_path)
      my_Model.tokenize_dataset(dataset_name, split, train_fraction=1, test_fraction=1)

      best_Model = fool_svd_compression(my_Model, rank_ratio=rank_ratio)
      res_param = count_parameters(best_Model.model) / count_parameters(my_Model.model)
      pre_acc = ft.evaluate(best_Model, dataset_name, split=split)
      print(f'resulting params : {res_param}')


      tuned_Model = ft.fine_tune(best_Model, dataset_name, train_fraction=1, test_fraction=1, n_epochs=3, batch_size=16)
      post_acc = ft.evaluate(tuned_Model, dataset_name, split=split)

      current_result['model'] = name
      current_result['dataset'] = dataset_name
      current_result['rank ratio'] = rank_ratio
      current_result['resulting parameters'] = res_param
      current_result['pre accuracy'] = pre_acc
      current_result['post accuracy'] = post_acc
      current_result['pre benchmark'] = ft.evaluate_glue_task(best_Model.model, best_Model.tokenizer, dataset_name, batch_size=32, device=best_Model.device)
      current_result['post benchmark'] = ft.evaluate_glue_task(tuned_Model.model, tuned_Model.tokenizer, dataset_name, batch_size=32, device=tuned_Model.device)

      results.append(current_result)



 name : bert, dataset_name : cola, rank ratio : 0.6
computing SVD of the model's layers... 



eval accuracy: 0.4074784276126558
resulting params : 0.9212104646224393


Step,Training Loss
500,0.3399
1000,0.2308
1500,0.1458


eval accuracy: 0.8216682646212847


 name : bert, dataset_name : cola, rank ratio : 0.5
computing SVD of the model's layers... 



eval accuracy: 0.32694151486097794
resulting params : 0.8060565283013891


Step,Training Loss
500,0.3951
1000,0.257
1500,0.1556


eval accuracy: 0.8111217641418984


 name : bert, dataset_name : cola, rank ratio : 0.1
computing SVD of the model's layers... 



eval accuracy: 0.3087248322147651
resulting params : 0.3393800495266066


Step,Training Loss
500,0.618
1000,0.6137
1500,0.6087


eval accuracy: 0.6912751677852349


 name : bert, dataset_name : mrpc, rank ratio : 0.6
computing SVD of the model's layers... 



eval accuracy: 0.7573529411764706
resulting params : 0.9212104646224393


Step,Training Loss
500,0.3865


eval accuracy: 0.8259803921568627


 name : bert, dataset_name : mrpc, rank ratio : 0.5
computing SVD of the model's layers... 



eval accuracy: 0.7328431372549019
resulting params : 0.8060565283013891


Step,Training Loss
500,0.5789


eval accuracy: 0.7377450980392157


 name : bert, dataset_name : mrpc, rank ratio : 0.1
computing SVD of the model's layers... 



eval accuracy: 0.32107843137254904
resulting params : 0.3393800495266066


Step,Training Loss
500,0.604


eval accuracy: 0.7083333333333334


 name : bert, dataset_name : rte, rank ratio : 0.6
computing SVD of the model's layers... 



eval accuracy: 0.6028880866425993
resulting params : 0.9212104646224393


Step,Training Loss


eval accuracy: 0.5884476534296029


 name : bert, dataset_name : rte, rank ratio : 0.5
computing SVD of the model's layers... 



eval accuracy: 0.5631768953068592
resulting params : 0.8060565283013891


Step,Training Loss


eval accuracy: 0.4729241877256318


 name : bert, dataset_name : rte, rank ratio : 0.1
computing SVD of the model's layers... 



eval accuracy: 0.5270758122743683
resulting params : 0.3393800495266066


Step,Training Loss


eval accuracy: 0.5415162454873647


 name : bert, dataset_name : qnli, rank ratio : 0.6
computing SVD of the model's layers... 



eval accuracy: 0.7664287021782903
resulting params : 0.9212104646224393


Step,Training Loss
500,0.3142
1000,0.2871
1500,0.3322
2000,0.3533
2500,0.6607
3000,0.7004
3500,0.7018
4000,0.6993
4500,0.7021
5000,0.702


eval accuracy: 0.4946000366099213


Map:   0%|          | 0/5463 [00:00<?, ? examples/s]

Map:   0%|          | 0/5463 [00:00<?, ? examples/s]



 name : bert, dataset_name : qnli, rank ratio : 0.5
computing SVD of the model's layers... 



eval accuracy: 0.71499176276771
resulting params : 0.8060565283013891


Step,Training Loss
500,0.3311
1000,0.3033
1500,0.3204
2000,0.3213
2500,0.2892
3000,0.3188
3500,0.3245
4000,0.3056
4500,0.3023
5000,0.3195


eval accuracy: 0.8696686802123376


 name : bert, dataset_name : qnli, rank ratio : 0.1
computing SVD of the model's layers... 



eval accuracy: 0.5132710964671426
resulting params : 0.3393800495266066


Step,Training Loss
500,0.6863
1000,0.6687
1500,0.6636
2000,0.6591
2500,0.6533
3000,0.6491
3500,0.6448
4000,0.6296
4500,0.6158
5000,0.5939


eval accuracy: 0.7755811825004576


 name : bert, dataset_name : wnli, rank ratio : 0.6


train-00000-of-00001.parquet:   0%|          | 0.00/38.8k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/11.1k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/13.6k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/635 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/71 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/146 [00:00<?, ? examples/s]

Map:   0%|          | 0/71 [00:00<?, ? examples/s]

computing SVD of the model's layers... 



Map:   0%|          | 0/71 [00:00<?, ? examples/s]

eval accuracy: 0.5633802816901409
resulting params : 0.9212104646224393


Map:   0%|          | 0/635 [00:00<?, ? examples/s]

Map:   0%|          | 0/71 [00:00<?, ? examples/s]

Step,Training Loss


eval accuracy: 0.4084507042253521


Map:   0%|          | 0/71 [00:00<?, ? examples/s]

Map:   0%|          | 0/71 [00:00<?, ? examples/s]



 name : bert, dataset_name : wnli, rank ratio : 0.5
computing SVD of the model's layers... 



eval accuracy: 0.5352112676056338
resulting params : 0.8060565283013891


Step,Training Loss


eval accuracy: 0.4788732394366197


 name : bert, dataset_name : wnli, rank ratio : 0.1
computing SVD of the model's layers... 



eval accuracy: 0.5633802816901409
resulting params : 0.3393800495266066


Step,Training Loss


eval accuracy: 0.5352112676056338


 name : gpt2, dataset_name : cola, rank ratio : 0.6


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

computing SVD of the model's layers... 



Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

eval accuracy: 0.31064237775647174
resulting params : 0.8625473666020712


Map:   0%|          | 0/8551 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

Step,Training Loss
500,0.6249
1000,0.6116
1500,0.5969


eval accuracy: 0.6845637583892618


Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]



 name : gpt2, dataset_name : cola, rank ratio : 0.5
computing SVD of the model's layers... 



eval accuracy: 0.3624161073825503
resulting params : 0.7724921929965316


Step,Training Loss
500,0.6263
1000,0.6142
1500,0.5895


eval accuracy: 0.6855225311601151


 name : gpt2, dataset_name : cola, rank ratio : 0.1
computing SVD of the model's layers... 



eval accuracy: 0.3231064237775647
resulting params : 0.40753175259513436


Step,Training Loss
500,0.6171
1000,0.6129
1500,0.6078


eval accuracy: 0.6912751677852349


 name : gpt2, dataset_name : mrpc, rank ratio : 0.6


Map:   0%|          | 0/408 [00:00<?, ? examples/s]

computing SVD of the model's layers... 



Map:   0%|          | 0/408 [00:00<?, ? examples/s]

eval accuracy: 0.34558823529411764
resulting params : 0.8625473666020712


Map:   0%|          | 0/3668 [00:00<?, ? examples/s]

Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Step,Training Loss
500,0.6218


eval accuracy: 0.7352941176470589


Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Map:   0%|          | 0/408 [00:00<?, ? examples/s]



 name : gpt2, dataset_name : mrpc, rank ratio : 0.5
computing SVD of the model's layers... 



eval accuracy: 0.33578431372549017
resulting params : 0.7724921929965316


Step,Training Loss
500,0.6522


eval accuracy: 0.7034313725490197


 name : gpt2, dataset_name : mrpc, rank ratio : 0.1
computing SVD of the model's layers... 



eval accuracy: 0.3161764705882353
resulting params : 0.40753175259513436


Step,Training Loss
500,0.6385


eval accuracy: 0.6813725490196079


 name : gpt2, dataset_name : rte, rank ratio : 0.6


Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

computing SVD of the model's layers... 



Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

eval accuracy: 0.49458483754512633
resulting params : 0.8625473666020712


Map:   0%|          | 0/2490 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Step,Training Loss


eval accuracy: 0.5740072202166066


Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Map:   0%|          | 0/277 [00:00<?, ? examples/s]



 name : gpt2, dataset_name : rte, rank ratio : 0.5
computing SVD of the model's layers... 



eval accuracy: 0.4729241877256318
resulting params : 0.7724921929965316


Step,Training Loss


eval accuracy: 0.5595667870036101


 name : gpt2, dataset_name : rte, rank ratio : 0.1
computing SVD of the model's layers... 



eval accuracy: 0.45126353790613716
resulting params : 0.40753175259513436


Step,Training Loss


eval accuracy: 0.4296028880866426


 name : gpt2, dataset_name : qnli, rank ratio : 0.6


Map:   0%|          | 0/5463 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

computing SVD of the model's layers... 



Map:   0%|          | 0/5463 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

eval accuracy: 0.7929708951125755
resulting params : 0.8625473666020712


Map:   0%|          | 0/104743 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Map:   0%|          | 0/5463 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Step,Training Loss
500,0.5131
1000,0.4711
1500,0.4612
2000,0.4444
2500,0.4311
3000,0.4291
3500,0.4276
4000,0.4127
4500,0.4171
5000,0.4012


eval accuracy: 0.8617975471352737


Map:   0%|          | 0/5463 [00:00<?, ? examples/s]

Map:   0%|          | 0/5463 [00:00<?, ? examples/s]



 name : gpt2, dataset_name : qnli, rank ratio : 0.5
computing SVD of the model's layers... 



eval accuracy: 0.6080907926047959
resulting params : 0.7724921929965316


Step,Training Loss
500,0.5383
1000,0.5005
1500,0.4787
2000,0.4701
2500,0.4505
3000,0.4476
3500,0.4499
4000,0.4398
4500,0.4334
5000,0.4169


eval accuracy: 0.8608822991030569


 name : gpt2, dataset_name : qnli, rank ratio : 0.1
computing SVD of the model's layers... 



eval accuracy: 0.47300018304960645
resulting params : 0.40753175259513436


Step,Training Loss
500,0.6992
1000,0.6937
1500,0.6801
2000,0.6699
2500,0.6667
3000,0.6612
3500,0.6626
4000,0.6153
4500,0.5766
5000,0.5549


eval accuracy: 0.8052352187442797


 name : gpt2, dataset_name : wnli, rank ratio : 0.6


Map:   0%|          | 0/71 [00:00<?, ? examples/s]

computing SVD of the model's layers... 



Map:   0%|          | 0/71 [00:00<?, ? examples/s]

eval accuracy: 0.5633802816901409
resulting params : 0.8625473666020712


Map:   0%|          | 0/635 [00:00<?, ? examples/s]

Map:   0%|          | 0/71 [00:00<?, ? examples/s]

Step,Training Loss


eval accuracy: 0.43661971830985913


Map:   0%|          | 0/71 [00:00<?, ? examples/s]

Map:   0%|          | 0/71 [00:00<?, ? examples/s]



 name : gpt2, dataset_name : wnli, rank ratio : 0.5
computing SVD of the model's layers... 



eval accuracy: 0.5070422535211268
resulting params : 0.7724921929965316


Step,Training Loss


eval accuracy: 0.43661971830985913


 name : gpt2, dataset_name : wnli, rank ratio : 0.1
computing SVD of the model's layers... 



eval accuracy: 0.5492957746478874
resulting params : 0.40753175259513436


Step,Training Loss


eval accuracy: 0.5633802816901409


 name : roberta, dataset_name : cola, rank ratio : 0.6


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

computing SVD of the model's layers... 



Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

eval accuracy: 0.3537871524448706
resulting params : 0.9307952519098508


Map:   0%|          | 0/8551 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

Step,Training Loss
500,0.6155
1000,0.6138
1500,0.6089


eval accuracy: 0.6912751677852349


Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]



 name : roberta, dataset_name : cola, rank ratio : 0.5
computing SVD of the model's layers... 



eval accuracy: 0.3835091083413231
resulting params : 0.8296498508550174


Step,Training Loss
500,0.6153
1000,0.614
1500,0.6087


eval accuracy: 0.6912751677852349


 name : roberta, dataset_name : cola, rank ratio : 0.1
computing SVD of the model's layers... 



eval accuracy: 0.6903163950143816
resulting params : 0.41974480447490303


Step,Training Loss
500,0.6117
1000,0.6121
1500,0.6089


eval accuracy: 0.6912751677852349


 name : roberta, dataset_name : mrpc, rank ratio : 0.6


Map:   0%|          | 0/408 [00:00<?, ? examples/s]

computing SVD of the model's layers... 



Map:   0%|          | 0/408 [00:00<?, ? examples/s]

eval accuracy: 0.4852941176470588
resulting params : 0.9307952519098508


Map:   0%|          | 0/3668 [00:00<?, ? examples/s]

Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Step,Training Loss
500,0.6379


eval accuracy: 0.6838235294117647


Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Map:   0%|          | 0/408 [00:00<?, ? examples/s]



 name : roberta, dataset_name : mrpc, rank ratio : 0.5
computing SVD of the model's layers... 



eval accuracy: 0.6102941176470589
resulting params : 0.8296498508550174


Step,Training Loss
500,0.6379


eval accuracy: 0.6838235294117647


 name : roberta, dataset_name : mrpc, rank ratio : 0.1
computing SVD of the model's layers... 



eval accuracy: 0.6838235294117647
resulting params : 0.41974480447490303


Step,Training Loss
500,0.6353


eval accuracy: 0.6838235294117647


 name : roberta, dataset_name : rte, rank ratio : 0.6


Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

computing SVD of the model's layers... 



Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

eval accuracy: 0.4729241877256318
resulting params : 0.9307952519098508


Map:   0%|          | 0/2490 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Step,Training Loss


eval accuracy: 0.4729241877256318


Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Map:   0%|          | 0/277 [00:00<?, ? examples/s]



 name : roberta, dataset_name : rte, rank ratio : 0.5
computing SVD of the model's layers... 



eval accuracy: 0.4729241877256318
resulting params : 0.8296498508550174


Step,Training Loss


eval accuracy: 0.4729241877256318


 name : roberta, dataset_name : rte, rank ratio : 0.1
computing SVD of the model's layers... 



eval accuracy: 0.4729241877256318
resulting params : 0.41974480447490303


Step,Training Loss


eval accuracy: 0.4729241877256318


 name : roberta, dataset_name : qnli, rank ratio : 0.6


Map:   0%|          | 0/5463 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

computing SVD of the model's layers... 



Map:   0%|          | 0/5463 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

eval accuracy: 0.6677649643053267
resulting params : 0.9307952519098508


Map:   0%|          | 0/104743 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Map:   0%|          | 0/5463 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Step,Training Loss
500,0.698
1000,0.6953
1500,0.6949
2000,0.6947
2500,0.6942
3000,0.695
3500,0.6944
4000,0.6938
4500,0.693
5000,0.6939


eval accuracy: 0.4946000366099213


Map:   0%|          | 0/5463 [00:00<?, ? examples/s]

Map:   0%|          | 0/5463 [00:00<?, ? examples/s]



 name : roberta, dataset_name : qnli, rank ratio : 0.5
computing SVD of the model's layers... 



eval accuracy: 0.5861248398315944
resulting params : 0.8296498508550174


Step,Training Loss
500,0.6984
1000,0.6952
1500,0.6948
2000,0.6947
2500,0.6941
3000,0.6949
3500,0.6943
4000,0.6938
4500,0.693
5000,0.6938


eval accuracy: 0.4946000366099213


 name : roberta, dataset_name : qnli, rank ratio : 0.1
computing SVD of the model's layers... 



eval accuracy: 0.4927695405454878
resulting params : 0.41974480447490303


Step,Training Loss
500,0.7014
1000,0.695
1500,0.6949
2000,0.6949
2500,0.6947
3000,0.6954
3500,0.6943
4000,0.6941
4500,0.6936
5000,0.6942


eval accuracy: 0.4946000366099213


 name : roberta, dataset_name : wnli, rank ratio : 0.6


Map:   0%|          | 0/71 [00:00<?, ? examples/s]

computing SVD of the model's layers... 



Map:   0%|          | 0/71 [00:00<?, ? examples/s]

eval accuracy: 0.5633802816901409
resulting params : 0.9307952519098508


Map:   0%|          | 0/635 [00:00<?, ? examples/s]

Map:   0%|          | 0/71 [00:00<?, ? examples/s]

Step,Training Loss


eval accuracy: 0.5633802816901409


Map:   0%|          | 0/71 [00:00<?, ? examples/s]

Map:   0%|          | 0/71 [00:00<?, ? examples/s]



 name : roberta, dataset_name : wnli, rank ratio : 0.5
computing SVD of the model's layers... 



eval accuracy: 0.5774647887323944
resulting params : 0.8296498508550174


Step,Training Loss


eval accuracy: 0.43661971830985913


 name : roberta, dataset_name : wnli, rank ratio : 0.1
computing SVD of the model's layers... 



eval accuracy: 0.43661971830985913
resulting params : 0.41974480447490303


Step,Training Loss


eval accuracy: 0.5633802816901409


In [None]:
print(results)
with open("/content/drive/MyDrive/MA2/Master Thesis/project/results/benchmark/fool_compression.pkl", 'wb') as f:
    pickle.dump(results, f)

[{'model': 'bert', 'dataset': 'cola', 'rank ratio': 0.6, 'resulting parameters': 0.9212104646224393, 'pre accuracy': 0.4074784276126558, 'post accuracy': 0.8216682646212847, 'pre benchmark': {'matthews': np.float64(0.5606238208505129)}, 'post benchmark': {'matthews': np.float64(0.5606238208505129)}}, {'model': 'bert', 'dataset': 'cola', 'rank ratio': 0.5, 'resulting parameters': 0.8060565283013891, 'pre accuracy': 0.32694151486097794, 'post accuracy': 0.8111217641418984, 'pre benchmark': {'matthews': np.float64(0.5332198659134496)}, 'post benchmark': {'matthews': np.float64(0.5332198659134496)}}, {'model': 'bert', 'dataset': 'cola', 'rank ratio': 0.1, 'resulting parameters': 0.3393800495266066, 'pre accuracy': 0.3087248322147651, 'post accuracy': 0.6912751677852349, 'pre benchmark': {'matthews': 0.0}, 'post benchmark': {'matthews': 0.0}}, {'model': 'bert', 'dataset': 'mrpc', 'rank ratio': 0.6, 'resulting parameters': 0.9212104646224393, 'pre accuracy': 0.7573529411764706, 'post accurac

# Compression without post fine-tuning / Task-specific proxy

Compression of fine-tuned models, using a task-specific proxy and without post fine-tuning

In [None]:
name_ = ['bert', 'gpt2', 'roberta']
dataset_name_ = ['cola', 'mrpc', 'rte', 'qnli', 'wnli']
tau_ = [0.25, 0.5]

task = 'classification'
split = "validation"
alpha = 0.1
method = 'afm'
error_type = 'distillation'
lr_metric = 'effective_rank'
proxy_type = 'dataset'
complexity_metric = 'max_param'

results = []

for name in name_ :
  for dataset_name in dataset_name_ :
    for tau in tau_ :
      print(f'\n\n name : {name}, dataset_name : {dataset_name}, tau : {tau}')
      current_result = {}

      weight_path = f'/content/drive/MyDrive/MA2/Master Thesis/project/models/{name}/{name}_{dataset_name}.pth'
      n_class = classification_dict[dataset_name]

      my_Model = Model(name, task, weight_path)
      my_Model.tokenize_dataset(dataset_name, split, train_fraction=1, test_fraction=1)

      base_acc = ft.evaluate(my_Model, dataset_name, split=split)

      best_Model, best_rank_ratio_dict, res_param = knapsack_greedy_compression(my_Model,
                                                                    min_rank_ratios=np.linspace(0.1, 0.6, 6), max_rank_ratio=0.7, rank_ratio_step=0.1, output_threshold=tau,
                                                                    proxy_type = proxy_type, dataset_name = dataset_name, batch=16, seq_len=10, hidden_size=768, mu=0, sigma=1,
                                                                    complexity_metric = complexity_metric, lr_metric = lr_metric, error_type=error_type, method=method, base_accuracy = base_acc, alpha=alpha
                                                                    )
      acc = ft.evaluate(best_Model, dataset_name, split=split)
      current_result['model'] = name
      current_result['dataset'] = dataset_name
      current_result['tau'] = tau
      current_result['alpha'] = alpha
      current_result['method'] = method
      current_result['resulting parameters'] = res_param
      current_result['relative accuracy'] = acc/base_acc
      current_result['benchmark'] = ft.evaluate_glue_task(best_Model.model, best_Model.tokenizer, dataset_name, batch_size=32, device=best_Model.device)

      results.append(current_result)



 name : bert, dataset_name : cola, tau : 0.25


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/251k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/37.6k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/37.7k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/8551 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1043 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1063 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

eval accuracy: 0.8245445829338447


eval accuracy: 0.800575263662512


Map:   0%|          | 0/1043 [00:00<?, ? examples/s]



 name : bert, dataset_name : cola, tau : 0.5


eval accuracy: 0.8245445829338447


eval accuracy: 0.7334611697027804


 name : bert, dataset_name : mrpc, tau : 0.25


train-00000-of-00001.parquet:   0%|          | 0.00/649k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/75.7k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/308k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/3668 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/408 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1725 [00:00<?, ? examples/s]

Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Map:   0%|          | 0/408 [00:00<?, ? examples/s]

eval accuracy: 0.8480392156862745


eval accuracy: 0.6446078431372549


Map:   0%|          | 0/408 [00:00<?, ? examples/s]



 name : bert, dataset_name : mrpc, tau : 0.5


eval accuracy: 0.8480392156862745


eval accuracy: 0.4534313725490196


 name : bert, dataset_name : rte, tau : 0.25


train-00000-of-00001.parquet:   0%|          | 0.00/584k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/69.0k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/621k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2490 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/277 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/3000 [00:00<?, ? examples/s]

Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

eval accuracy: 0.6389891696750902


eval accuracy: 0.628158844765343


Map:   0%|          | 0/277 [00:00<?, ? examples/s]



 name : bert, dataset_name : rte, tau : 0.5


eval accuracy: 0.6389891696750902


eval accuracy: 0.5342960288808665


 name : bert, dataset_name : qnli, tau : 0.25


train-00000-of-00001.parquet:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/872k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/877k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/104743 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/5463 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5463 [00:00<?, ? examples/s]

Map:   0%|          | 0/5463 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Map:   0%|          | 0/5463 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

eval accuracy: 0.8663737872963573


eval accuracy: 0.8272011715174813


Map:   0%|          | 0/5463 [00:00<?, ? examples/s]



 name : bert, dataset_name : qnli, tau : 0.5


eval accuracy: 0.8663737872963573


eval accuracy: 0.8453230825553725


 name : bert, dataset_name : wnli, tau : 0.25


train-00000-of-00001.parquet:   0%|          | 0.00/38.8k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/11.1k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/13.6k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/635 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/71 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/146 [00:00<?, ? examples/s]

Map:   0%|          | 0/71 [00:00<?, ? examples/s]

Map:   0%|          | 0/71 [00:00<?, ? examples/s]

eval accuracy: 0.5633802816901409


eval accuracy: 0.5352112676056338


Map:   0%|          | 0/71 [00:00<?, ? examples/s]



 name : bert, dataset_name : wnli, tau : 0.5


eval accuracy: 0.5633802816901409


eval accuracy: 0.4788732394366197


 name : gpt2, dataset_name : cola, tau : 0.25


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

eval accuracy: 0.7785234899328859


eval accuracy: 0.7593480345158198


Map:   0%|          | 0/1043 [00:00<?, ? examples/s]



 name : gpt2, dataset_name : cola, tau : 0.5


eval accuracy: 0.7785234899328859


eval accuracy: 0.716203259827421


 name : gpt2, dataset_name : mrpc, tau : 0.25


Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Map:   0%|          | 0/408 [00:00<?, ? examples/s]

eval accuracy: 0.803921568627451


eval accuracy: 0.803921568627451


Map:   0%|          | 0/408 [00:00<?, ? examples/s]



 name : gpt2, dataset_name : mrpc, tau : 0.5


eval accuracy: 0.803921568627451


eval accuracy: 0.7696078431372549


 name : gpt2, dataset_name : rte, tau : 0.25


Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

eval accuracy: 0.631768953068592


eval accuracy: 0.6064981949458483


Map:   0%|          | 0/277 [00:00<?, ? examples/s]



 name : gpt2, dataset_name : rte, tau : 0.5


eval accuracy: 0.631768953068592


eval accuracy: 0.5812274368231047


 name : gpt2, dataset_name : qnli, tau : 0.25


Map:   0%|          | 0/5463 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Map:   0%|          | 0/5463 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

eval accuracy: 0.8848617975471352


eval accuracy: 0.8755262676185246


Map:   0%|          | 0/5463 [00:00<?, ? examples/s]



 name : gpt2, dataset_name : qnli, tau : 0.5


eval accuracy: 0.8848617975471352


eval accuracy: 0.8279333699432546


 name : gpt2, dataset_name : wnli, tau : 0.25


Map:   0%|          | 0/71 [00:00<?, ? examples/s]

Map:   0%|          | 0/71 [00:00<?, ? examples/s]

eval accuracy: 0.3380281690140845


eval accuracy: 0.43661971830985913


Map:   0%|          | 0/71 [00:00<?, ? examples/s]



 name : gpt2, dataset_name : wnli, tau : 0.5


eval accuracy: 0.3380281690140845


eval accuracy: 0.5633802816901409


 name : roberta, dataset_name : cola, tau : 0.25


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

eval accuracy: 0.8312559923298178


eval accuracy: 0.8207094918504314


Map:   0%|          | 0/1043 [00:00<?, ? examples/s]



 name : roberta, dataset_name : cola, tau : 0.5


eval accuracy: 0.8312559923298178


eval accuracy: 0.8130393096836049


 name : roberta, dataset_name : mrpc, tau : 0.25


Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Map:   0%|          | 0/408 [00:00<?, ? examples/s]

eval accuracy: 0.8774509803921569


eval accuracy: 0.8700980392156863


Map:   0%|          | 0/408 [00:00<?, ? examples/s]



 name : roberta, dataset_name : mrpc, tau : 0.5


eval accuracy: 0.8774509803921569


eval accuracy: 0.7696078431372549


 name : roberta, dataset_name : rte, tau : 0.25


Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

eval accuracy: 0.4729241877256318


eval accuracy: 0.4729241877256318


Map:   0%|          | 0/277 [00:00<?, ? examples/s]



 name : roberta, dataset_name : rte, tau : 0.5


eval accuracy: 0.4729241877256318


eval accuracy: 0.4729241877256318


 name : roberta, dataset_name : qnli, tau : 0.25


Map:   0%|          | 0/5463 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Map:   0%|          | 0/5463 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

eval accuracy: 0.8890719384953323


eval accuracy: 0.8665568369028006


Map:   0%|          | 0/5463 [00:00<?, ? examples/s]



 name : roberta, dataset_name : qnli, tau : 0.5


eval accuracy: 0.8890719384953323


eval accuracy: 0.7964488376349991


 name : roberta, dataset_name : wnli, tau : 0.25


Map:   0%|          | 0/71 [00:00<?, ? examples/s]

Map:   0%|          | 0/71 [00:00<?, ? examples/s]

eval accuracy: 0.5633802816901409


eval accuracy: 0.5633802816901409


Map:   0%|          | 0/71 [00:00<?, ? examples/s]



 name : roberta, dataset_name : wnli, tau : 0.5


eval accuracy: 0.5633802816901409


eval accuracy: 0.5492957746478874


In [None]:
print(results)
with open("/content/drive/MyDrive/MA2/Master Thesis/project/results/benchmark/no_post_fine_tuning__task_specific.pkl", 'wb') as f:
    pickle.dump(results, f)

[{'model': 'bert', 'dataset': 'cola', 'tau': 0.25, 'alpha': 0.1, 'method': 'afm', 'resulting parameters': 0.9245396427587655, 'relative accuracy': 0.9709302325581395, 'benchmark': {'matthews': np.float64(0.5484845164994472)}}, {'model': 'bert', 'dataset': 'cola', 'tau': 0.5, 'alpha': 0.1, 'method': 'afm', 'resulting parameters': 0.8705205167472391, 'relative accuracy': 0.8895348837209301, 'benchmark': {'matthews': np.float64(0.4681762291184293)}}, {'model': 'bert', 'dataset': 'mrpc', 'tau': 0.25, 'alpha': 0.1, 'method': 'afm', 'resulting parameters': 0.9244863197906816, 'relative accuracy': 0.7601156069364162, 'benchmark': {'accuracy': 0.8259803921568627, 'f1': 0.8834154351395731}}, {'model': 'bert', 'dataset': 'mrpc', 'tau': 0.5, 'alpha': 0.1, 'method': 'afm', 'resulting parameters': 0.8867021194683289, 'relative accuracy': 0.5346820809248555, 'benchmark': {'accuracy': 0.7965686274509803, 'f1': 0.865040650406504}}, {'model': 'bert', 'dataset': 'rte', 'tau': 0.25, 'alpha': 0.1, 'method

# Compression without post fine-tuning / Random proxy

Compression of fine-tuned models, using a random proxy and without post fine-tuning

In [None]:
name_ = ['bert', 'gpt2', 'roberta']
dataset_name_ = ['cola', 'mrpc', 'rte', 'qnli', 'wnli']
tau_ = [0.1, 0.25, 0.5]

task = 'classification'
split = "validation"
alpha = None
method = 'svd'
error_type = 'hidden_output'
lr_metric = 'effective_rank'
proxy_type = 'random'
complexity_metric = 'n_param'

results = []

for name in name_ :
  for dataset_name in dataset_name_ :
    for tau in tau_ :
      print(f'\n\n name : {name}, dataset_name : {dataset_name}, tau : {tau}')
      current_result = {}

      weight_path = f'/content/drive/MyDrive/MA2/Master Thesis/project/models/{name}/{name}_{dataset_name}.pth'
      n_class = classification_dict[dataset_name]

      my_Model = Model(name, task, weight_path)
      my_Model.tokenize_dataset(dataset_name, split, train_fraction=1, test_fraction=1)

      base_acc = ft.evaluate(my_Model, dataset_name, split=split)

      best_Model, best_rank_ratio_dict, res_param = knapsack_greedy_compression(my_Model,
                                                                    min_rank_ratios=np.linspace(0.1, 0.6, 6), max_rank_ratio=0.7, rank_ratio_step=0.1, output_threshold=tau,
                                                                    proxy_type = proxy_type, dataset_name = dataset_name, batch=16, seq_len=10, hidden_size=768, mu=0, sigma=1,
                                                                    complexity_metric = complexity_metric, lr_metric = lr_metric, error_type=error_type, method=method, base_accuracy = base_acc, alpha=alpha
                                                                    )
      acc = ft.evaluate(best_Model, dataset_name, split=split)
      current_result['model'] = name
      current_result['dataset'] = dataset_name
      current_result['tau'] = tau
      current_result['alpha'] = alpha
      current_result['method'] = method
      current_result['resulting parameters'] = res_param
      current_result['relative accuracy'] = acc/base_acc
      current_result['benchmark'] = ft.evaluate_glue_task(best_Model.model, best_Model.tokenizer, dataset_name, batch_size=32, device=best_Model.device)

      results.append(current_result)



 name : bert, dataset_name : cola, tau : 0.1


eval accuracy: 0.8245445829338447


eval accuracy: 0.8245445829338447


 name : bert, dataset_name : cola, tau : 0.25


eval accuracy: 0.8245445829338447


eval accuracy: 0.825503355704698


 name : bert, dataset_name : cola, tau : 0.5


eval accuracy: 0.8245445829338447


eval accuracy: 0.8283796740172579


 name : bert, dataset_name : mrpc, tau : 0.1


eval accuracy: 0.8480392156862745


eval accuracy: 0.8480392156862745


 name : bert, dataset_name : mrpc, tau : 0.25


eval accuracy: 0.8480392156862745


eval accuracy: 0.8014705882352942


 name : bert, dataset_name : mrpc, tau : 0.5


eval accuracy: 0.8480392156862745


eval accuracy: 0.5343137254901961


 name : bert, dataset_name : rte, tau : 0.1


eval accuracy: 0.6389891696750902


eval accuracy: 0.6389891696750902


 name : bert, dataset_name : rte, tau : 0.25


eval accuracy: 0.6389891696750902


eval accuracy: 0.6425992779783394


 name : bert, dataset_name : rte, tau : 0.5


eval accuracy: 0.6389891696750902


eval accuracy: 0.6389891696750902


 name : bert, dataset_name : qnli, tau : 0.1


eval accuracy: 0.8663737872963573


eval accuracy: 0.8660076880834706


 name : bert, dataset_name : qnli, tau : 0.25


eval accuracy: 0.8663737872963573


eval accuracy: 0.865641588870584


 name : bert, dataset_name : qnli, tau : 0.5


eval accuracy: 0.8663737872963573


eval accuracy: 0.8628958447739338


 name : bert, dataset_name : wnli, tau : 0.1


eval accuracy: 0.5633802816901409


eval accuracy: 0.5633802816901409


 name : bert, dataset_name : wnli, tau : 0.25


eval accuracy: 0.5633802816901409


eval accuracy: 0.5633802816901409


 name : bert, dataset_name : wnli, tau : 0.5


eval accuracy: 0.5633802816901409


eval accuracy: 0.4507042253521127


 name : gpt2, dataset_name : cola, tau : 0.1


eval accuracy: 0.7785234899328859


eval accuracy: 0.7727708533077661


 name : gpt2, dataset_name : cola, tau : 0.25


eval accuracy: 0.7785234899328859


eval accuracy: 0.3710450623202301


 name : gpt2, dataset_name : cola, tau : 0.5


eval accuracy: 0.7785234899328859


eval accuracy: 0.3087248322147651


 name : gpt2, dataset_name : mrpc, tau : 0.1


eval accuracy: 0.803921568627451


eval accuracy: 0.7818627450980392


 name : gpt2, dataset_name : mrpc, tau : 0.25


eval accuracy: 0.803921568627451


eval accuracy: 0.696078431372549


 name : gpt2, dataset_name : mrpc, tau : 0.5


eval accuracy: 0.803921568627451


eval accuracy: 0.3872549019607843


 name : gpt2, dataset_name : rte, tau : 0.1


eval accuracy: 0.631768953068592


eval accuracy: 0.5740072202166066


 name : gpt2, dataset_name : rte, tau : 0.25


eval accuracy: 0.631768953068592


eval accuracy: 0.5740072202166066


 name : gpt2, dataset_name : rte, tau : 0.5


eval accuracy: 0.631768953068592


eval accuracy: 0.516245487364621


 name : gpt2, dataset_name : qnli, tau : 0.1


eval accuracy: 0.8848617975471352


eval accuracy: 0.8755262676185246


 name : gpt2, dataset_name : qnli, tau : 0.25


eval accuracy: 0.8848617975471352


eval accuracy: 0.8769906644700713


 name : gpt2, dataset_name : qnli, tau : 0.5


eval accuracy: 0.8848617975471352


eval accuracy: 0.565806333516383


 name : gpt2, dataset_name : wnli, tau : 0.1


eval accuracy: 0.3380281690140845


eval accuracy: 0.5492957746478874


 name : gpt2, dataset_name : wnli, tau : 0.25


eval accuracy: 0.3380281690140845


eval accuracy: 0.5633802816901409


 name : gpt2, dataset_name : wnli, tau : 0.5


eval accuracy: 0.3380281690140845


eval accuracy: 0.5633802816901409


 name : roberta, dataset_name : cola, tau : 0.1


eval accuracy: 0.8312559923298178


eval accuracy: 0.8312559923298178


 name : roberta, dataset_name : cola, tau : 0.25


eval accuracy: 0.8312559923298178


eval accuracy: 0.8312559923298178


 name : roberta, dataset_name : cola, tau : 0.5


eval accuracy: 0.8312559923298178


eval accuracy: 0.8302972195589645


 name : roberta, dataset_name : mrpc, tau : 0.1


eval accuracy: 0.8774509803921569


eval accuracy: 0.8774509803921569


 name : roberta, dataset_name : mrpc, tau : 0.25


eval accuracy: 0.8774509803921569


eval accuracy: 0.8774509803921569


 name : roberta, dataset_name : mrpc, tau : 0.5


eval accuracy: 0.8774509803921569


eval accuracy: 0.803921568627451


 name : roberta, dataset_name : rte, tau : 0.1


eval accuracy: 0.4729241877256318


eval accuracy: 0.4729241877256318


 name : roberta, dataset_name : rte, tau : 0.25


eval accuracy: 0.4729241877256318


eval accuracy: 0.4729241877256318


 name : roberta, dataset_name : rte, tau : 0.5


eval accuracy: 0.4729241877256318


eval accuracy: 0.4729241877256318


 name : roberta, dataset_name : qnli, tau : 0.1


eval accuracy: 0.8890719384953323


eval accuracy: 0.8892549881017756


 name : roberta, dataset_name : qnli, tau : 0.25


eval accuracy: 0.8890719384953323


eval accuracy: 0.8896210873146623


 name : roberta, dataset_name : qnli, tau : 0.5


eval accuracy: 0.8890719384953323


eval accuracy: 0.6307889438037708


 name : roberta, dataset_name : wnli, tau : 0.1


eval accuracy: 0.5633802816901409


eval accuracy: 0.5352112676056338


 name : roberta, dataset_name : wnli, tau : 0.25


eval accuracy: 0.5633802816901409


eval accuracy: 0.4225352112676056


 name : roberta, dataset_name : wnli, tau : 0.5


eval accuracy: 0.5633802816901409


eval accuracy: 0.43661971830985913


In [None]:
print(results)
with open("/content/drive/MyDrive/MA2/Master Thesis/project/results/benchmark/no_post_fine_tuning__random.pkl", 'wb') as f:
    pickle.dump(results, f)

[{'model': 'bert', 'dataset': 'cola', 'tau': 0.1, 'alpha': None, 'method': 'svd', 'resulting parameters': 0.9972852964573437, 'relative accuracy': 1.0, 'benchmark': {'matthews': np.float64(0.5711485966856213)}}, {'model': 'bert', 'dataset': 'cola', 'tau': 0.25, 'alpha': None, 'method': 'svd', 'resulting parameters': 0.9865176738785905, 'relative accuracy': 1.0011627906976743, 'benchmark': {'matthews': np.float64(0.573727765379499)}}, {'model': 'bert', 'dataset': 'cola', 'tau': 0.5, 'alpha': None, 'method': 'svd', 'resulting parameters': 0.9595389921600989, 'relative accuracy': 1.0046511627906975, 'benchmark': {'matthews': np.float64(0.5814477644432363)}}, {'model': 'bert', 'dataset': 'mrpc', 'tau': 0.1, 'alpha': None, 'method': 'svd', 'resulting parameters': 1, 'relative accuracy': 1.0, 'benchmark': {'accuracy': 0.8406862745098039, 'f1': 0.8888888888888888}}, {'model': 'bert', 'dataset': 'mrpc', 'tau': 0.25, 'alpha': None, 'method': 'svd', 'resulting parameters': 0.9593636237141908, 'r

# Compression with post fine-tuning / Task-specific proxy

Compression of fine-tuned models, using a task-specific proxy and with post fine-tuning for 3 epochs

In [None]:
name_ = ['bert', 'gpt2', 'roberta']
dataset_name_ = ['cola', 'mrpc', 'rte', 'qnli', 'wnli']
tau_ = [0.5, 0.75, 1.0, 1.25]

task = 'classification'
split = "validation"
alpha = None
method = 'afm'
error_type = 'hidden_output'
lr_metric = 'effective_rank'
proxy_type = 'dataset'
complexity_metric = 'n_param'
min_rank_ratios = [0.05, 0.1, 0.2, 0.3, 0.4]

results = []

for name in name_ :
  for dataset_name in dataset_name_ :
    for tau in tau_ :
      print(f'\n\n name : {name}, dataset_name : {dataset_name}, tau : {tau}')
      current_result = {}

      weight_path = f'/content/drive/MyDrive/MA2/Master Thesis/project/models/{name}/{name}_{dataset_name}.pth'
      n_class = classification_dict[dataset_name]

      my_Model = Model(name, task, weight_path)
      my_Model.tokenize_dataset(dataset_name, split, train_fraction=1, test_fraction=1)

      base_acc = ft.evaluate(my_Model, dataset_name, split=split)

      best_Model, best_rank_ratio_dict, res_param = knapsack_greedy_compression(my_Model,
                                                                    min_rank_ratios=min_rank_ratios, max_rank_ratio=0.7, rank_ratio_step=0.1, output_threshold=tau,
                                                                    proxy_type = proxy_type, dataset_name = dataset_name, batch=16, seq_len=10, hidden_size=768, mu=0, sigma=1,
                                                                    complexity_metric = complexity_metric, lr_metric = lr_metric, error_type=error_type, method=method, base_accuracy = base_acc, alpha=alpha
                                                                    )

      tuned_Model = ft.fine_tune(best_Model, dataset_name, train_fraction=1, test_fraction=1, n_epochs=3, batch_size=16)
      acc = ft.evaluate(tuned_Model, dataset_name, split=split)

      current_result['model'] = name
      current_result['dataset'] = dataset_name
      current_result['tau'] = tau
      current_result['alpha'] = alpha
      current_result['method'] = method
      current_result['resulting parameters'] = res_param
      current_result['relative accuracy'] = acc/base_acc
      current_result['benchmark'] = ft.evaluate_glue_task(tuned_Model.model, tuned_Model.tokenizer, dataset_name, batch_size=32, device=tuned_Model.device)

      results.append(current_result)



 name : bert, dataset_name : cola, tau : 0.5


eval accuracy: 0.8245445829338447


Step,Training Loss
500,0.3016
1000,0.2066
1500,0.1237


eval accuracy: 0.8149568552253116


 name : bert, dataset_name : cola, tau : 0.75


eval accuracy: 0.8245445829338447


Step,Training Loss
500,0.3612
1000,0.2398
1500,0.1367


eval accuracy: 0.8024928092042186


 name : bert, dataset_name : cola, tau : 1.0


eval accuracy: 0.8245445829338447


Step,Training Loss
500,0.4114
1000,0.26
1500,0.1574


eval accuracy: 0.8120805369127517


 name : bert, dataset_name : cola, tau : 1.25


eval accuracy: 0.8245445829338447


Step,Training Loss
500,0.482
1000,0.2997
1500,0.1899


eval accuracy: 0.7938638542665388


 name : bert, dataset_name : mrpc, tau : 0.5


eval accuracy: 0.8480392156862745


Step,Training Loss
500,0.2476


eval accuracy: 0.8357843137254902


 name : bert, dataset_name : mrpc, tau : 0.75


eval accuracy: 0.8480392156862745


Step,Training Loss
500,0.4306


eval accuracy: 0.8137254901960784


 name : bert, dataset_name : mrpc, tau : 1.0


eval accuracy: 0.8480392156862745


Step,Training Loss
500,0.649


eval accuracy: 0.6838235294117647


 name : bert, dataset_name : mrpc, tau : 1.25


eval accuracy: 0.8480392156862745


Step,Training Loss
500,0.6023


eval accuracy: 0.7132352941176471


 name : bert, dataset_name : rte, tau : 0.5


eval accuracy: 0.6389891696750902


Step,Training Loss


eval accuracy: 0.631768953068592


 name : bert, dataset_name : rte, tau : 0.75


eval accuracy: 0.6389891696750902


Step,Training Loss


eval accuracy: 0.5884476534296029


 name : bert, dataset_name : rte, tau : 1.0


eval accuracy: 0.6389891696750902


Step,Training Loss


eval accuracy: 0.4729241877256318


 name : bert, dataset_name : rte, tau : 1.25


eval accuracy: 0.6389891696750902


Step,Training Loss


eval accuracy: 0.5776173285198556


 name : bert, dataset_name : qnli, tau : 0.5


eval accuracy: 0.8663737872963573


Step,Training Loss
500,0.2598
1000,0.2452
1500,0.2614
2000,0.2596
2500,0.2423
3000,0.248
3500,0.2528
4000,0.2536
4500,0.2507
5000,0.24


eval accuracy: 0.8813838550247117


 name : bert, dataset_name : qnli, tau : 0.75


eval accuracy: 0.8663737872963573


Step,Training Loss
500,0.4058
1000,0.3279
1500,0.3299
2000,0.3229
2500,0.3089
3000,0.3147
3500,0.32
4000,0.3211
4500,0.3117
5000,0.3147


eval accuracy: 0.880468606992495


 name : bert, dataset_name : qnli, tau : 1.0


eval accuracy: 0.8663737872963573


Step,Training Loss
500,0.4774
1000,0.3854
1500,0.3832
2000,0.3798
2500,0.3547
3000,0.3508
3500,0.3616
4000,0.3551
4500,0.3534
5000,0.3412


eval accuracy: 0.8575874061870767


 name : bert, dataset_name : qnli, tau : 1.25


eval accuracy: 0.8663737872963573


Step,Training Loss
500,0.6957
1000,0.6763
1500,0.6713
2000,0.6714
2500,0.6696
3000,0.6659
3500,0.6664
4000,0.662
4500,0.67
5000,0.6662


eval accuracy: 0.6198059674171701


 name : bert, dataset_name : wnli, tau : 0.5


eval accuracy: 0.5633802816901409


Step,Training Loss


eval accuracy: 0.43661971830985913


 name : bert, dataset_name : wnli, tau : 0.75


eval accuracy: 0.5633802816901409


Step,Training Loss


eval accuracy: 0.5915492957746479


 name : bert, dataset_name : wnli, tau : 1.0


eval accuracy: 0.5633802816901409


Step,Training Loss


eval accuracy: 0.5915492957746479


 name : bert, dataset_name : wnli, tau : 1.25


eval accuracy: 0.5633802816901409


Step,Training Loss


eval accuracy: 0.5915492957746479


 name : gpt2, dataset_name : cola, tau : 0.5


eval accuracy: 0.7785234899328859


Step,Training Loss
500,0.5963
1000,0.518
1500,0.42


eval accuracy: 0.7315436241610739


 name : gpt2, dataset_name : cola, tau : 0.75


eval accuracy: 0.7785234899328859


Step,Training Loss
500,0.625
1000,0.6161
1500,0.6065


eval accuracy: 0.6912751677852349


 name : gpt2, dataset_name : cola, tau : 1.0


eval accuracy: 0.7785234899328859


Step,Training Loss
500,0.6234
1000,0.6179
1500,0.6081


eval accuracy: 0.6912751677852349


 name : gpt2, dataset_name : cola, tau : 1.25


eval accuracy: 0.7785234899328859


Step,Training Loss
500,0.6194
1000,0.6152
1500,0.6079


eval accuracy: 0.6912751677852349


 name : gpt2, dataset_name : mrpc, tau : 0.5


eval accuracy: 0.803921568627451


Step,Training Loss
500,0.3619


eval accuracy: 0.8308823529411765


 name : gpt2, dataset_name : mrpc, tau : 0.75


eval accuracy: 0.803921568627451


Step,Training Loss
500,0.4363


eval accuracy: 0.7990196078431373


 name : gpt2, dataset_name : mrpc, tau : 1.0


eval accuracy: 0.803921568627451


Step,Training Loss
500,0.6434


eval accuracy: 0.6911764705882353


 name : gpt2, dataset_name : mrpc, tau : 1.25


eval accuracy: 0.803921568627451


Step,Training Loss
500,0.6332


eval accuracy: 0.7083333333333334


 name : gpt2, dataset_name : rte, tau : 0.5


eval accuracy: 0.631768953068592


Step,Training Loss


eval accuracy: 0.5306859205776173


 name : gpt2, dataset_name : rte, tau : 0.75


eval accuracy: 0.631768953068592


Step,Training Loss


eval accuracy: 0.5270758122743683


 name : gpt2, dataset_name : rte, tau : 1.0


eval accuracy: 0.631768953068592


Step,Training Loss


eval accuracy: 0.48375451263537905


 name : gpt2, dataset_name : rte, tau : 1.25


eval accuracy: 0.631768953068592


Step,Training Loss


eval accuracy: 0.4729241877256318


 name : gpt2, dataset_name : qnli, tau : 0.5


eval accuracy: 0.8848617975471352


Step,Training Loss
500,0.3775
1000,0.3659
1500,0.354
2000,0.3428
2500,0.3313
3000,0.3325
3500,0.333
4000,0.3327
4500,0.3265
5000,0.3154


eval accuracy: 0.8784550613216182


 name : gpt2, dataset_name : qnli, tau : 0.75


eval accuracy: 0.8848617975471352


Step,Training Loss
500,0.4377
1000,0.4144
1500,0.3985
2000,0.386
2500,0.3722
3000,0.3768
3500,0.3736
4000,0.3677
4500,0.362
5000,0.3493


eval accuracy: 0.8758923668314114


 name : gpt2, dataset_name : qnli, tau : 1.0


eval accuracy: 0.8848617975471352


Step,Training Loss
500,0.7128
1000,0.6839
1500,0.6255
2000,0.5844
2500,0.5741
3000,0.55
3500,0.5438
4000,0.5379
4500,0.5246
5000,0.5227


eval accuracy: 0.8140215998535603


 name : gpt2, dataset_name : qnli, tau : 1.25


eval accuracy: 0.8848617975471352


Step,Training Loss
500,0.7043
1000,0.6609
1500,0.5905
2000,0.5689
2500,0.5506
3000,0.5351
3500,0.5281
4000,0.5267
4500,0.5167
5000,0.5133


eval accuracy: 0.8167673439502106


 name : gpt2, dataset_name : wnli, tau : 0.5


eval accuracy: 0.3380281690140845


Step,Training Loss


eval accuracy: 0.43661971830985913


 name : gpt2, dataset_name : wnli, tau : 0.75


eval accuracy: 0.3380281690140845


Step,Training Loss


eval accuracy: 0.43661971830985913


 name : gpt2, dataset_name : wnli, tau : 1.0


eval accuracy: 0.3380281690140845


Step,Training Loss


eval accuracy: 0.43661971830985913


 name : gpt2, dataset_name : wnli, tau : 1.25


eval accuracy: 0.3380281690140845


Step,Training Loss


eval accuracy: 0.43661971830985913


 name : roberta, dataset_name : cola, tau : 0.5


eval accuracy: 0.8312559923298178


Step,Training Loss
500,0.3368
1000,0.2409
1500,0.1608


eval accuracy: 0.8053691275167785


 name : roberta, dataset_name : cola, tau : 0.75


eval accuracy: 0.8312559923298178


Step,Training Loss
500,0.4976
1000,0.3497
1500,0.244


eval accuracy: 0.7909875359539789


 name : roberta, dataset_name : cola, tau : 1.0


eval accuracy: 0.8312559923298178


Step,Training Loss
500,0.6097
1000,0.6129
1500,0.6087


eval accuracy: 0.6912751677852349


 name : roberta, dataset_name : cola, tau : 1.25


eval accuracy: 0.8312559923298178


Step,Training Loss
500,0.6118
1000,0.6131
1500,0.6084


eval accuracy: 0.6912751677852349


 name : roberta, dataset_name : mrpc, tau : 0.5


eval accuracy: 0.8774509803921569


Step,Training Loss
500,0.4196


eval accuracy: 0.8602941176470589


 name : roberta, dataset_name : mrpc, tau : 0.75


eval accuracy: 0.8774509803921569


Step,Training Loss
500,0.506


eval accuracy: 0.8259803921568627


 name : roberta, dataset_name : mrpc, tau : 1.0


eval accuracy: 0.8774509803921569


Step,Training Loss
500,0.5858


eval accuracy: 0.7475490196078431


 name : roberta, dataset_name : mrpc, tau : 1.25


eval accuracy: 0.8774509803921569


Step,Training Loss
500,0.6271


eval accuracy: 0.6887254901960784


 name : roberta, dataset_name : rte, tau : 0.5


eval accuracy: 0.4729241877256318


Step,Training Loss


eval accuracy: 0.4729241877256318


 name : roberta, dataset_name : rte, tau : 0.75


eval accuracy: 0.4729241877256318


Step,Training Loss


eval accuracy: 0.4729241877256318


 name : roberta, dataset_name : rte, tau : 1.0


eval accuracy: 0.4729241877256318


Step,Training Loss


eval accuracy: 0.4729241877256318


 name : roberta, dataset_name : rte, tau : 1.25


eval accuracy: 0.4729241877256318


Step,Training Loss


eval accuracy: 0.4729241877256318


 name : roberta, dataset_name : qnli, tau : 0.5


eval accuracy: 0.8890719384953323


Step,Training Loss
500,0.5839
1000,0.6969
1500,0.6952
2000,0.6952
2500,0.6952
3000,0.6959
3500,0.6948
4000,0.6945
4500,0.6937
5000,0.6942


eval accuracy: 0.4946000366099213


 name : roberta, dataset_name : qnli, tau : 0.75


eval accuracy: 0.8890719384953323


Step,Training Loss
500,0.638
1000,0.6497
1500,0.6959
2000,0.695
2500,0.6945
3000,0.695
3500,0.6945
4000,0.694
4500,0.6931
5000,0.6937


Step,Training Loss
500,0.638
1000,0.6497
1500,0.6959
2000,0.695
2500,0.6945
3000,0.695
3500,0.6945
4000,0.694
4500,0.6931
5000,0.6937


eval accuracy: 0.4946000366099213


 name : roberta, dataset_name : qnli, tau : 1.0


eval accuracy: 0.8890719384953323


Step,Training Loss
500,0.6967
1000,0.6953
1500,0.6947
2000,0.6946
2500,0.6943
3000,0.6948
3500,0.6944
4000,0.6939
4500,0.6932
5000,0.694


eval accuracy: 0.4946000366099213


 name : roberta, dataset_name : qnli, tau : 1.25


eval accuracy: 0.8890719384953323


Step,Training Loss
500,0.6967
1000,0.6953
1500,0.6947
2000,0.6946
2500,0.6943
3000,0.6948
3500,0.6944
4000,0.6939
4500,0.6932
5000,0.694


eval accuracy: 0.4946000366099213


 name : roberta, dataset_name : wnli, tau : 0.5


eval accuracy: 0.5633802816901409


Step,Training Loss


eval accuracy: 0.5633802816901409


 name : roberta, dataset_name : wnli, tau : 0.75


eval accuracy: 0.5633802816901409


Step,Training Loss


eval accuracy: 0.5633802816901409


 name : roberta, dataset_name : wnli, tau : 1.0


eval accuracy: 0.5633802816901409


Step,Training Loss


eval accuracy: 0.5633802816901409


 name : roberta, dataset_name : wnli, tau : 1.25


eval accuracy: 0.5633802816901409


Step,Training Loss


eval accuracy: 0.5633802816901409


In [None]:
print(results)
with open("/content/drive/MyDrive/MA2/Master Thesis/project/results/benchmark/post_fine_tuning__task_specific.pkl", 'wb') as f:
    pickle.dump(results, f)

[{'model': 'bert', 'dataset': 'cola', 'tau': 0.5, 'alpha': None, 'method': 'afm', 'resulting parameters': 0.8841059905696714, 'relative accuracy': 0.9883720930232558, 'benchmark': {'matthews': np.float64(0.5445498216235399)}}, {'model': 'bert', 'dataset': 'cola', 'tau': 0.75, 'alpha': None, 'method': 'afm', 'resulting parameters': 0.865144423496237, 'relative accuracy': 0.9732558139534884, 'benchmark': {'matthews': np.float64(0.5168494611799795)}}, {'model': 'bert', 'dataset': 'cola', 'tau': 1.0, 'alpha': None, 'method': 'afm', 'resulting parameters': 0.8463582705375768, 'relative accuracy': 0.9848837209302326, 'benchmark': {'matthews': np.float64(0.5391508045334119)}}, {'model': 'bert', 'dataset': 'cola', 'tau': 1.25, 'alpha': None, 'method': 'afm', 'resulting parameters': 0.8192743312164474, 'relative accuracy': 0.9627906976744186, 'benchmark': {'matthews': np.float64(0.49038894064526567)}}, {'model': 'bert', 'dataset': 'mrpc', 'tau': 0.5, 'alpha': None, 'method': 'afm', 'resulting p

# Compression with post fine-tuning / Random proxy

Compression of fine-tuned models, using a random proxy and with post fine-tuning for 3 epochs

In [None]:
name_ = ['bert', 'gpt2', 'roberta']
dataset_name_ = ['cola', 'mrpc', 'rte', 'qnli', 'wnli']
tau_ = [0.5, 0.75, 1.0]

task = 'classification'
split = "validation"
alpha = None
method = 'svd'
error_type = 'hidden_output'
lr_metric = 'effective_rank'
proxy_type = 'random'
complexity_metric = 'n_param'

results = []

for name in name_ :
  for dataset_name in dataset_name_ :
    for tau in tau_ :
      print(f'\n\n name : {name}, dataset_name : {dataset_name}, tau : {tau}')
      current_result = {}

      weight_path = f'/content/drive/MyDrive/MA2/Master Thesis/project/models/{name}/{name}_{dataset_name}.pth'
      n_class = classification_dict[dataset_name]

      my_Model = Model(name, task, weight_path)
      my_Model.tokenize_dataset(dataset_name, split, train_fraction=1, test_fraction=1)

      base_acc = ft.evaluate(my_Model, dataset_name, split=split)

      best_Model, best_rank_ratio_dict, res_param = knapsack_greedy_compression(my_Model,
                                                                    min_rank_ratios=np.linspace(0.1, 0.6, 6), max_rank_ratio=0.7, rank_ratio_step=0.1, output_threshold=tau,
                                                                    proxy_type = proxy_type, dataset_name = dataset_name, batch=16, seq_len=10, hidden_size=768, mu=0, sigma=1,
                                                                    complexity_metric = complexity_metric, lr_metric = lr_metric, error_type=error_type, method=method, base_accuracy = base_acc, alpha=alpha
                                                                    )

      tuned_Model = ft.fine_tune(best_Model, dataset_name, train_fraction=1, test_fraction=1, n_epochs=3, batch_size=16)
      acc = ft.evaluate(tuned_Model, dataset_name, split=split)

      current_result['model'] = name
      current_result['dataset'] = dataset_name
      current_result['tau'] = tau
      current_result['alpha'] = alpha
      current_result['method'] = method
      current_result['resulting parameters'] = res_param
      current_result['relative accuracy'] = acc/base_acc
      current_result['benchmark'] = ft.evaluate_glue_task(tuned_Model.model, tuned_Model.tokenizer, dataset_name, batch_size=32, device=tuned_Model.device)

      results.append(current_result)



 name : bert, dataset_name : cola, tau : 0.5


eval accuracy: 0.8245445829338447


Map:   0%|          | 0/8551 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

Step,Training Loss
500,0.2136
1000,0.1471
1500,0.1013


eval accuracy: 0.8235858101629914


 name : bert, dataset_name : cola, tau : 0.75


eval accuracy: 0.8245445829338447


Step,Training Loss
500,0.2873
1000,0.1982
1500,0.1265


eval accuracy: 0.8149568552253116


 name : bert, dataset_name : cola, tau : 1.0


eval accuracy: 0.8245445829338447


Step,Training Loss
500,0.4977
1000,0.3015
1500,0.1748


eval accuracy: 0.7871524448705657


 name : bert, dataset_name : mrpc, tau : 0.5


eval accuracy: 0.8480392156862745


Map:   0%|          | 0/3668 [00:00<?, ? examples/s]

Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Step,Training Loss
500,0.6329


eval accuracy: 0.7647058823529411


 name : bert, dataset_name : mrpc, tau : 0.75


eval accuracy: 0.8480392156862745


Step,Training Loss
500,0.6405


eval accuracy: 0.6813725490196079


 name : bert, dataset_name : mrpc, tau : 1.0


eval accuracy: 0.8480392156862745


Step,Training Loss
500,0.6085


eval accuracy: 0.6838235294117647


 name : bert, dataset_name : rte, tau : 0.5


eval accuracy: 0.6389891696750902


Map:   0%|          | 0/2490 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Step,Training Loss


eval accuracy: 0.6101083032490975


 name : bert, dataset_name : rte, tau : 0.75


eval accuracy: 0.6389891696750902


Step,Training Loss


eval accuracy: 0.5812274368231047


 name : bert, dataset_name : rte, tau : 1.0


eval accuracy: 0.6389891696750902


Step,Training Loss


eval accuracy: 0.5848375451263538


 name : bert, dataset_name : qnli, tau : 0.5


eval accuracy: 0.8663737872963573


Map:   0%|          | 0/104743 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Map:   0%|          | 0/5463 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Step,Training Loss
500,0.1977
1000,0.1941
1500,0.1989
2000,0.2065
2500,0.1885
3000,0.2097
3500,0.2071
4000,0.2109
4500,0.2002
5000,0.2186


eval accuracy: 0.8888888888888888


 name : bert, dataset_name : qnli, tau : 0.75


eval accuracy: 0.8663737872963573


Step,Training Loss
500,0.2067
1000,0.1982
1500,0.2032
2000,0.2109
2500,0.1964
3000,0.2099
3500,0.2092
4000,0.2199
4500,0.2193
5000,0.2116


eval accuracy: 0.886326194398682


 name : bert, dataset_name : qnli, tau : 1.0


eval accuracy: 0.8663737872963573


Step,Training Loss
500,0.3322
1000,0.2802
1500,0.2919
2000,0.2864
2500,0.2669
3000,0.2808
3500,0.2869
4000,0.291
4500,0.2719
5000,0.277


eval accuracy: 0.8782720117151748


 name : bert, dataset_name : wnli, tau : 0.5


eval accuracy: 0.5633802816901409


Map:   0%|          | 0/635 [00:00<?, ? examples/s]

Map:   0%|          | 0/71 [00:00<?, ? examples/s]

Step,Training Loss


eval accuracy: 0.5633802816901409


 name : bert, dataset_name : wnli, tau : 0.75


eval accuracy: 0.5633802816901409


Step,Training Loss


eval accuracy: 0.5633802816901409


 name : bert, dataset_name : wnli, tau : 1.0


eval accuracy: 0.5633802816901409


Step,Training Loss


eval accuracy: 0.43661971830985913


 name : gpt2, dataset_name : cola, tau : 0.5


eval accuracy: 0.7785234899328859


Map:   0%|          | 0/8551 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

Step,Training Loss
500,0.6306
1000,0.6148
1500,0.5997


eval accuracy: 0.6903163950143816


 name : gpt2, dataset_name : cola, tau : 0.75


eval accuracy: 0.7785234899328859


Step,Training Loss
500,0.6199
1000,0.6125
1500,0.6079


eval accuracy: 0.6912751677852349


 name : gpt2, dataset_name : cola, tau : 1.0


eval accuracy: 0.7785234899328859


Step,Training Loss
500,0.6171
1000,0.6129
1500,0.6078


eval accuracy: 0.6912751677852349


 name : gpt2, dataset_name : mrpc, tau : 0.5


eval accuracy: 0.803921568627451


Map:   0%|          | 0/3668 [00:00<?, ? examples/s]

Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Step,Training Loss
500,0.638


eval accuracy: 0.7205882352941176


 name : gpt2, dataset_name : mrpc, tau : 0.75


eval accuracy: 0.803921568627451


Step,Training Loss
500,0.6427


eval accuracy: 0.6887254901960784


 name : gpt2, dataset_name : mrpc, tau : 1.0


eval accuracy: 0.803921568627451


Step,Training Loss
500,0.6385


eval accuracy: 0.6813725490196079


 name : gpt2, dataset_name : rte, tau : 0.5


eval accuracy: 0.631768953068592


Map:   0%|          | 0/2490 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Step,Training Loss


eval accuracy: 0.5306859205776173


 name : gpt2, dataset_name : rte, tau : 0.75


eval accuracy: 0.631768953068592


Step,Training Loss


eval accuracy: 0.4729241877256318


 name : gpt2, dataset_name : rte, tau : 1.0


eval accuracy: 0.631768953068592


Step,Training Loss


eval accuracy: 0.4296028880866426


 name : gpt2, dataset_name : qnli, tau : 0.5


eval accuracy: 0.8848617975471352


Map:   0%|          | 0/104743 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Map:   0%|          | 0/5463 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Step,Training Loss
500,0.4267
1000,0.3913
1500,0.3786
2000,0.3679
2500,0.3556
3000,0.3587
3500,0.3609
4000,0.3584
4500,0.3545
5000,0.3401


eval accuracy: 0.8700347794252242


 name : gpt2, dataset_name : qnli, tau : 0.75


eval accuracy: 0.8848617975471352


Step,Training Loss
500,0.7033
1000,0.6782
1500,0.662
2000,0.6002
2500,0.5737
3000,0.5555
3500,0.5524
4000,0.5435
4500,0.5315
5000,0.5235


eval accuracy: 0.8094453596924767


 name : gpt2, dataset_name : qnli, tau : 1.0


eval accuracy: 0.8848617975471352


Step,Training Loss
500,0.6992
1000,0.6937
1500,0.6801
2000,0.6699
2500,0.6667
3000,0.6612
3500,0.6626
4000,0.6153
4500,0.5766
5000,0.5549


eval accuracy: 0.8052352187442797


 name : gpt2, dataset_name : wnli, tau : 0.5


eval accuracy: 0.3380281690140845


Map:   0%|          | 0/635 [00:00<?, ? examples/s]

Map:   0%|          | 0/71 [00:00<?, ? examples/s]

Step,Training Loss


eval accuracy: 0.43661971830985913


 name : gpt2, dataset_name : wnli, tau : 0.75


eval accuracy: 0.3380281690140845


Step,Training Loss


eval accuracy: 0.5633802816901409


 name : gpt2, dataset_name : wnli, tau : 1.0


eval accuracy: 0.3380281690140845


Step,Training Loss


eval accuracy: 0.5633802816901409


 name : roberta, dataset_name : cola, tau : 0.5


eval accuracy: 0.8312559923298178


Map:   0%|          | 0/8551 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

Step,Training Loss
500,0.2541
1000,0.1878
1500,0.1222


eval accuracy: 0.8302972195589645


 name : roberta, dataset_name : cola, tau : 0.75


eval accuracy: 0.8312559923298178


Step,Training Loss
500,0.6182
1000,0.6141
1500,0.6087


eval accuracy: 0.6912751677852349


 name : roberta, dataset_name : cola, tau : 1.0


eval accuracy: 0.8312559923298178


Step,Training Loss
500,0.6117
1000,0.6121
1500,0.6089


eval accuracy: 0.6912751677852349


 name : roberta, dataset_name : mrpc, tau : 0.5


eval accuracy: 0.8774509803921569


Map:   0%|          | 0/3668 [00:00<?, ? examples/s]

Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Step,Training Loss
500,0.3688


eval accuracy: 0.8774509803921569


 name : roberta, dataset_name : mrpc, tau : 0.75


eval accuracy: 0.8774509803921569


Step,Training Loss
500,0.6284


eval accuracy: 0.7107843137254902


 name : roberta, dataset_name : mrpc, tau : 1.0


eval accuracy: 0.8774509803921569


Step,Training Loss
500,0.6362


eval accuracy: 0.6838235294117647


 name : roberta, dataset_name : rte, tau : 0.5


eval accuracy: 0.4729241877256318


Map:   0%|          | 0/2490 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Step,Training Loss


eval accuracy: 0.4729241877256318


 name : roberta, dataset_name : rte, tau : 0.75


eval accuracy: 0.4729241877256318


Step,Training Loss


eval accuracy: 0.4729241877256318


 name : roberta, dataset_name : rte, tau : 1.0


eval accuracy: 0.4729241877256318


Step,Training Loss


eval accuracy: 0.4729241877256318


 name : roberta, dataset_name : qnli, tau : 0.5


eval accuracy: 0.8890719384953323


Map:   0%|          | 0/104743 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Map:   0%|          | 0/5463 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Step,Training Loss
500,0.6877
1000,0.6955
1500,0.6952
2000,0.6948
2500,0.6948
3000,0.695
3500,0.6944
4000,0.6938
4500,0.6931
5000,0.6938


eval accuracy: 0.4946000366099213


 name : roberta, dataset_name : qnli, tau : 0.75


eval accuracy: 0.8890719384953323


Step,Training Loss
500,0.697
1000,0.6951
1500,0.695
2000,0.6949
2500,0.6947
3000,0.6954
3500,0.6944
4000,0.6941
4500,0.6936
5000,0.6942


Step,Training Loss
500,0.697
1000,0.6951
1500,0.695
2000,0.6949
2500,0.6947
3000,0.6954
3500,0.6944
4000,0.6941
4500,0.6936
5000,0.6942


eval accuracy: 0.4946000366099213


 name : roberta, dataset_name : qnli, tau : 1.0


eval accuracy: 0.8890719384953323


Step,Training Loss
500,0.7014
1000,0.695
1500,0.6949
2000,0.6949
2500,0.6947
3000,0.6954
3500,0.6943
4000,0.6941
4500,0.6936
5000,0.6942


eval accuracy: 0.4946000366099213


 name : roberta, dataset_name : wnli, tau : 0.5


eval accuracy: 0.5633802816901409


Map:   0%|          | 0/635 [00:00<?, ? examples/s]

Map:   0%|          | 0/71 [00:00<?, ? examples/s]

Step,Training Loss


eval accuracy: 0.5633802816901409


 name : roberta, dataset_name : wnli, tau : 0.75


eval accuracy: 0.5633802816901409


Step,Training Loss


eval accuracy: 0.5633802816901409


 name : roberta, dataset_name : wnli, tau : 1.0


eval accuracy: 0.5633802816901409


Step,Training Loss


eval accuracy: 0.5633802816901409


In [None]:
print(results)
with open("/content/drive/MyDrive/MA2/Master Thesis/project/results/benchmark/post_fine_tuning__random.pkl", 'wb') as f:
    pickle.dump(results, f)

[{'model': 'bert', 'dataset': 'cola', 'tau': 0.5, 'alpha': None, 'method': 'svd', 'resulting parameters': 0.9595389921600989, 'relative accuracy': 0.9988372093023256, 'benchmark': {'matthews': np.float64(0.5657894359264041)}}, {'model': 'bert', 'dataset': 'cola', 'tau': 0.75, 'alpha': None, 'method': 'svd', 'resulting parameters': 0.8975637833762002, 'relative accuracy': 0.9883720930232558, 'benchmark': {'matthews': np.float64(0.5448598482839426)}}, {'model': 'bert', 'dataset': 'cola', 'tau': 1.0, 'alpha': None, 'method': 'svd', 'resulting parameters': 0.6909657246208657, 'relative accuracy': 0.9546511627906976, 'benchmark': {'matthews': np.float64(0.4734123999422081)}}, {'model': 'bert', 'dataset': 'mrpc', 'tau': 0.5, 'alpha': None, 'method': 'svd', 'resulting parameters': 0.8328458120983001, 'relative accuracy': 0.9017341040462428, 'benchmark': {'accuracy': 0.7549019607843137, 'f1': 0.8422712933753943}}, {'model': 'bert', 'dataset': 'mrpc', 'tau': 0.75, 'alpha': None, 'method': 'svd'