In [1]:
from sentence_transformers import SentenceTransformer
from mteb import MTEB
from rmt_laser import ModelModifier

In [2]:
import warnings
# ignore the warning of huggingface datasets
warnings.filterwarnings("ignore")

In [3]:
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [4]:
evaluation = MTEB(tasks=["AmazonCounterfactualClassification"], task_langs=["en"])
results = evaluation.run(model, output_folder=f"results", eval_splits=["test"], overwrite_results = True)
results

{'AmazonCounterfactualClassification': {'mteb_version': '1.1.1',
  'dataset_revision': 'e8379541af4e31359cca9fbcf4b00f2671dba205',
  'mteb_dataset_name': 'AmazonCounterfactualClassification',
  'test': {'en': {'accuracy': 0.6358208955223881,
    'f1': 0.575684526690965,
    'ap': 0.26851178506856466,
    'accuracy_stderr': 0.038817449507632086,
    'f1_stderr': 0.0314147489193903,
    'ap_stderr': 0.021065524721780828,
    'main_score': 0.6358208955223881},
   'evaluation_time': 4.41}}}

Default run with only 5 modifications

In [5]:
layers = list(range(5, -1, -1))
layers = [f".{l}." for l in layers]
print(layers)

modifier = ModelModifier("sentence-transformers/all-MiniLM-L6-v2", "AmazonCounterfactualClassification", "train", "accuracy")

loop_check, min_loss = modifier.search_optimal_layer_modification(layer_types=['attention.self.query', 'attention.self.key', 'attention.self.value', 'attention.output.dense', 'intermediate.dense', 'output.dense'], layer_numbers=layers, max_mod=5)

['.5.', '.4.', '.3.', '.2.', '.1.', '.0.']
sentence-transformers/all-MiniLM-L6-v2


The initial performance of the model is 0.638302638128422
Reconstructing layer: auto_model.encoder.layer.5.attention.self.query
Reduced from torch.Size([384]) to 72


Restored original weights for layer: auto_model.encoder.layer.5.attention.self.query
Reconstructing layer: auto_model.encoder.layer.5.attention.self.key
Reduced from torch.Size([384]) to 73


**************************************************
Improved performance: 0.6463165754106519 for layer attention.self.key .5.. Total modifications is 1
**************************************************
Reconstructing layer: auto_model.encoder.layer.5.attention.self.value
Reduced from torch.Size([384]) to 75


**************************************************
Improved performance: 0.6508461921353907 for layer attention.self.value .5.. Total modifications is 2
**************************************************
Reconstructing layer: auto_model.encoder.layer.5.attention.output.dense
Reduced from torch.Size([384]) to 104


**************************************************
Improved performance: 0.652190144350423 for layer attention.output.dense .5.. Total modifications is 3
**************************************************
Reconstructing layer: auto_model.encoder.layer.5.intermediate.dense
Reduced from torch.Size([384]) to 295


Restored original weights for layer: auto_model.encoder.layer.5.intermediate.dense
Reconstructing layer: auto_model.encoder.layer.5.attention.output.dense
Reduced from torch.Size([384]) to 13


**************************************************
Improved performance: 0.6532603285216526 for layer output.dense .5.. Total modifications is 4
**************************************************
Reconstructing layer: auto_model.encoder.layer.4.attention.self.query
Reduced from torch.Size([384]) to 68


**************************************************
Improved performance: 0.6606520657043304 for layer attention.self.query .4.. Total modifications is 5
**************************************************


In [6]:
evaluation = MTEB(tasks=["AmazonCounterfactualClassification"], task_langs=["en"])
results = evaluation.run(modifier.model, output_folder=f"results", eval_splits=["test"], overwrite_results = True)

In [7]:
results

{'AmazonCounterfactualClassification': {'mteb_version': '1.1.1',
  'dataset_revision': 'e8379541af4e31359cca9fbcf4b00f2671dba205',
  'mteb_dataset_name': 'AmazonCounterfactualClassification',
  'test': {'en': {'accuracy': 0.6453731343283582,
    'f1': 0.581659644697337,
    'ap': 0.27151949056389996,
    'accuracy_stderr': 0.04884589174706333,
    'f1_stderr': 0.03677148957425979,
    'ap_stderr': 0.0211363201281932,
    'main_score': 0.6453731343283582},
   'evaluation_time': 1.99}}}

Lets go brrrrum

In [8]:
layers = list(range(5, -1, -1))
layers = [f".{l}." for l in layers]
print(layers)

modifier = ModelModifier("sentence-transformers/all-MiniLM-L6-v2", "AmazonCounterfactualClassification", "train", "accuracy")

loop_check, min_loss = modifier.search_optimal_layer_modification(layer_types=['attention.self.query', 'attention.self.key', 'attention.self.value', 'attention.output.dense', 'intermediate.dense', 'output.dense'], layer_numbers=layers, max_mod=-1)

['.5.', '.4.', '.3.', '.2.', '.1.', '.0.']
sentence-transformers/all-MiniLM-L6-v2


The initial performance of the model is 0.638302638128422
Reconstructing layer: auto_model.encoder.layer.5.attention.self.query
Reduced from torch.Size([384]) to 72


Restored original weights for layer: auto_model.encoder.layer.5.attention.self.query
Reconstructing layer: auto_model.encoder.layer.5.attention.self.key
Reduced from torch.Size([384]) to 73


**************************************************
Improved performance: 0.6463165754106519 for layer attention.self.key .5.. Total modifications is 1
**************************************************
Reconstructing layer: auto_model.encoder.layer.5.attention.self.value
Reduced from torch.Size([384]) to 75


**************************************************
Improved performance: 0.6508461921353907 for layer attention.self.value .5.. Total modifications is 2
**************************************************
Reconstructing layer: auto_model.encoder.layer.5.attention.output.dense
Reduced from torch.Size([384]) to 104


**************************************************
Improved performance: 0.652190144350423 for layer attention.output.dense .5.. Total modifications is 3
**************************************************
Reconstructing layer: auto_model.encoder.layer.5.intermediate.dense
Reduced from torch.Size([384]) to 295


Restored original weights for layer: auto_model.encoder.layer.5.intermediate.dense
Reconstructing layer: auto_model.encoder.layer.5.attention.output.dense
Reduced from torch.Size([384]) to 13


**************************************************
Improved performance: 0.6532603285216526 for layer output.dense .5.. Total modifications is 4
**************************************************
Reconstructing layer: auto_model.encoder.layer.4.attention.self.query
Reduced from torch.Size([384]) to 68


**************************************************
Improved performance: 0.6606520657043304 for layer attention.self.query .4.. Total modifications is 5
**************************************************
Reconstructing layer: auto_model.encoder.layer.4.attention.self.key
Reduced from torch.Size([384]) to 70


**************************************************
Improved performance: 0.6618715778994524 for layer attention.self.key .4.. Total modifications is 6
**************************************************
Reconstructing layer: auto_model.encoder.layer.4.attention.self.value
Reduced from torch.Size([384]) to 75


**************************************************
Improved performance: 0.6630662020905924 for layer attention.self.value .4.. Total modifications is 7
**************************************************
Reconstructing layer: auto_model.encoder.layer.4.attention.output.dense
Reduced from torch.Size([384]) to 71


**************************************************
Improved performance: 0.6634892981582877 for layer attention.output.dense .4.. Total modifications is 8
**************************************************
Reconstructing layer: auto_model.encoder.layer.4.intermediate.dense
Reduced from torch.Size([384]) to 327


**************************************************
Improved performance: 0.6684917869586859 for layer intermediate.dense .4.. Total modifications is 9
**************************************************
Reconstructing layer: auto_model.encoder.layer.4.attention.output.dense
Reduced from torch.Size([384]) to 79


Restored original weights for layer: auto_model.encoder.layer.4.attention.output.dense
No original weights saved for layer: auto_model.encoder.layer.4.output.dense
Reconstructing layer: auto_model.encoder.layer.3.attention.self.query
Reduced from torch.Size([384]) to 69


Restored original weights for layer: auto_model.encoder.layer.3.attention.self.query
Reconstructing layer: auto_model.encoder.layer.3.attention.self.key
Reduced from torch.Size([384]) to 69


Restored original weights for layer: auto_model.encoder.layer.3.attention.self.key
Reconstructing layer: auto_model.encoder.layer.3.attention.self.value
Reduced from torch.Size([384]) to 74


**************************************************
Improved performance: 0.6816824290691886 for layer attention.self.value .3.. Total modifications is 10
**************************************************
Reconstructing layer: auto_model.encoder.layer.3.attention.output.dense
Reduced from torch.Size([384]) to 65


**************************************************
Improved performance: 0.6824041811846691 for layer attention.output.dense .3.. Total modifications is 11
**************************************************
Reconstructing layer: auto_model.encoder.layer.3.intermediate.dense
Reduced from torch.Size([384]) to 280


Restored original weights for layer: auto_model.encoder.layer.3.intermediate.dense
Reconstructing layer: auto_model.encoder.layer.3.attention.output.dense
Reduced from torch.Size([384]) to 75


Restored original weights for layer: auto_model.encoder.layer.3.attention.output.dense
No original weights saved for layer: auto_model.encoder.layer.3.output.dense
Reconstructing layer: auto_model.encoder.layer.2.attention.self.query
Reduced from torch.Size([384]) to 67


Restored original weights for layer: auto_model.encoder.layer.2.attention.self.query
Reconstructing layer: auto_model.encoder.layer.2.attention.self.key
Reduced from torch.Size([384]) to 67


Restored original weights for layer: auto_model.encoder.layer.2.attention.self.key
Reconstructing layer: auto_model.encoder.layer.2.attention.self.value
Reduced from torch.Size([384]) to 71


**************************************************
Improved performance: 0.6913887506222001 for layer attention.self.value .2.. Total modifications is 12
**************************************************
Reconstructing layer: auto_model.encoder.layer.2.attention.output.dense
Reduced from torch.Size([384]) to 64


**************************************************
Improved performance: 0.6935540069686411 for layer attention.output.dense .2.. Total modifications is 13
**************************************************
Reconstructing layer: auto_model.encoder.layer.2.intermediate.dense
Reduced from torch.Size([384]) to 262


Restored original weights for layer: auto_model.encoder.layer.2.intermediate.dense
Reconstructing layer: auto_model.encoder.layer.2.attention.output.dense
Reduced from torch.Size([384]) to 75


Restored original weights for layer: auto_model.encoder.layer.2.attention.output.dense
No original weights saved for layer: auto_model.encoder.layer.2.output.dense
Reconstructing layer: auto_model.encoder.layer.1.attention.self.query
Reduced from torch.Size([384]) to 63


Restored original weights for layer: auto_model.encoder.layer.1.attention.self.query
Reconstructing layer: auto_model.encoder.layer.1.attention.self.key
Reduced from torch.Size([384]) to 66


Restored original weights for layer: auto_model.encoder.layer.1.attention.self.key
Reconstructing layer: auto_model.encoder.layer.1.attention.self.value
Reduced from torch.Size([384]) to 74


**************************************************
Improved performance: 0.7001493280238925 for layer attention.self.value .1.. Total modifications is 14
**************************************************
Reconstructing layer: auto_model.encoder.layer.1.attention.output.dense
Reduced from torch.Size([384]) to 67


Restored original weights for layer: auto_model.encoder.layer.1.attention.output.dense
Reconstructing layer: auto_model.encoder.layer.1.intermediate.dense
Reduced from torch.Size([384]) to 280


Restored original weights for layer: auto_model.encoder.layer.1.intermediate.dense
Reconstructing layer: auto_model.encoder.layer.1.attention.output.dense
Reduced from torch.Size([384]) to 67


Restored original weights for layer: auto_model.encoder.layer.1.attention.output.dense
No original weights saved for layer: auto_model.encoder.layer.1.output.dense
Reconstructing layer: auto_model.encoder.layer.0.attention.self.query
Reduced from torch.Size([384]) to 67


Restored original weights for layer: auto_model.encoder.layer.0.attention.self.query
Reconstructing layer: auto_model.encoder.layer.0.attention.self.key
Reduced from torch.Size([384]) to 73


Restored original weights for layer: auto_model.encoder.layer.0.attention.self.key
Reconstructing layer: auto_model.encoder.layer.0.attention.self.value
Reduced from torch.Size([384]) to 74


Restored original weights for layer: auto_model.encoder.layer.0.attention.self.value
Reconstructing layer: auto_model.encoder.layer.0.attention.output.dense
Reduced from torch.Size([384]) to 76


Restored original weights for layer: auto_model.encoder.layer.0.attention.output.dense
Reconstructing layer: auto_model.encoder.layer.0.intermediate.dense
Reduced from torch.Size([384]) to 353


**************************************************
Improved performance: 0.702140368342459 for layer intermediate.dense .0.. Total modifications is 15
**************************************************
Reconstructing layer: auto_model.encoder.layer.0.attention.output.dense
Reduced from torch.Size([384]) to 76


Restored original weights for layer: auto_model.encoder.layer.0.attention.output.dense
No original weights saved for layer: auto_model.encoder.layer.0.output.dense


In [9]:
evaluation = MTEB(tasks=["AmazonCounterfactualClassification"], task_langs=["en"])
results = evaluation.run(modifier.model, output_folder=f"results", eval_splits=["test"], overwrite_results = True)

In [10]:
results

{'AmazonCounterfactualClassification': {'mteb_version': '1.1.1',
  'dataset_revision': 'e8379541af4e31359cca9fbcf4b00f2671dba205',
  'mteb_dataset_name': 'AmazonCounterfactualClassification',
  'test': {'en': {'accuracy': 0.6946268656716418,
    'f1': 0.6286425315011868,
    'ap': 0.310106486602168,
    'accuracy_stderr': 0.044412453524530854,
    'f1_stderr': 0.036946517653703354,
    'ap_stderr': 0.02831499661872636,
    'main_score': 0.6946268656716418},
   'evaluation_time': 0.9}}}

**Accuracy**
- Start: 63.58
- 5 Mods: 64.53
- 15 Mods: 69.46

**F1**:
- Start: 57.56
- 5 Mods: 58.16
- 15 Mods: 62.86