In [None]:
pip install datasets > /dev/null
pip install -U accelerate transformers > /dev/null

In [None]:
! pip install -U accelerate > /dev/null
! pip install -U transformers  > /dev/null

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import scipy
import re
import tensorflow as tf
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset, Dataset
import random

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef

def load_model():
  model = AutoModelForSequenceClassification.from_pretrained("/content/drive/MyDrive/")
  return model

def preprocess_function(examples):
    return tokenizer(examples["sentence"], truncation=True, padding=True)

def compute_metrics(pred):
    labels = pred.label_ids
    preds = np.argmax(pred.predictions, axis=1)
    acc = accuracy_score(labels, preds)
    precision = precision_score(labels, preds, average='binary')
    recall = recall_score(labels, preds, average='binary')
    f1 = f1_score(labels, preds, average='binary')
    mcc = matthews_corrcoef(labels, preds)
    return {
        'accuracy': acc,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'matthews': mcc
    }

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
dataset = load_dataset("glue", "cola")
tokenized_dataset = dataset.map(preprocess_function, batched=True)['validation']

training_args = TrainingArguments(
    per_device_eval_batch_size=100,
    output_dir='./results',
)

In [None]:
model = load_model()

# Evaluate modified model and save result
trainer = Trainer(
  model=model,
  args = training_args,
  compute_metrics=compute_metrics,
)

for x in model.state_dict().keys():
  tensor = model.state_dict()[x]
  if ".layer." in x and len(tensor.size()) == 2:
    randomly_prune_blocks_by_area(tensor, area_percentage = 0.3, block_size = 32)

trainer.evaluate(tokenized_dataset)

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 1.322219967842102,
 'eval_accuracy': 0.3087248322147651,
 'eval_precision': 0.0,
 'eval_recall': 0.0,
 'eval_f1': 0.0,
 'eval_matthews': 0.0,
 'eval_runtime': 1.2176,
 'eval_samples_per_second': 856.592,
 'eval_steps_per_second': 9.034}

In [None]:
predictions = trainer.predict(tokenized_dataset)

pred_scores = np.argmax(predictions.predictions, axis=1)
pred_labels = predictions.label_ids

print(pred_scores)
print(pred_labels)

  _warn_prf(average, modifier, msg_start, len(result))


[0 0 0 ... 0 0 0]
[1 1 1 ... 0 1 1]


In [None]:
print_weight_matrices(model.cpu(), ignore_zeros=True, visualization_mode='abs')

In [None]:
## USING AREAS

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

fig, axs = plt.subplots(2, 3, figsize=(15, 9))
values = np.arange(0, 0.4, 0.005)
values_100 = 100 * values

for block_size in [8, 16, 32, 64]:
  eval_results = []
  for area in values:
    # Load model
    model = load_model()

    # Modify model
    for x in model.state_dict().keys():
      tensor = model.state_dict()[x]
      if ".layer." in x and len(tensor.size()) == 2:
        randomly_prune_blocks_by_area(tensor, area, block_size)

    # Evaluate modified model and save result
    trainer = Trainer(
      model=model,
      args = training_args,
      compute_metrics=compute_metrics,
    )
    eval_results.append(trainer.evaluate(tokenized_dataset))

  eval_loss = [x['eval_loss'] for x in eval_results]
  eval_accuracy = [x['eval_accuracy'] for x in eval_results]
  eval_precision = [x['eval_precision'] for x in eval_results]
  eval_recall = [x['eval_recall'] for x in eval_results]
  eval_f1 = [x['eval_f1'] for x in eval_results]
  eval_matthews = [x['eval_matthews'] for x in eval_results]

  axs[0, 0].plot(values_100, eval_loss, label = f"Block size {block_size}")
  axs[0, 1].plot(values_100, eval_accuracy, label = f"Block size {block_size}")
  axs[0, 2].plot(values_100, eval_precision, label = f"Block size {block_size}")
  axs[1, 0].plot(values_100, eval_recall, label = f"Block size {block_size}")
  axs[1, 1].plot(values_100, eval_f1, label = f"Block size {block_size}")
  axs[1, 2].plot(values_100, eval_matthews, label = f"Block size {block_size}")

axs[0, 0].set_title('Loss')
axs[0, 0].set_xlabel('% of pruned area')
axs[0, 0].set_ylabel('Loss')
axs[0, 0].legend()

axs[0, 1].set_title('Accuracy')
axs[0, 1].set_xlabel('% of pruned area')
axs[0, 1].set_ylabel('Accuracy')
axs[0, 1].legend()

axs[0, 2].set_title('Precision')
axs[0, 2].set_xlabel('% of pruned area')
axs[0, 2].set_ylabel('Precision')
axs[0, 2].legend()

axs[1, 0].set_title('Recall')
axs[1, 0].set_xlabel('% of pruned area')
axs[1, 0].set_ylabel('Recall')
axs[1, 0].legend()

axs[1, 1].set_title('F1 Score')
axs[1, 1].set_xlabel('% of pruned area')
axs[1, 1].set_ylabel('F1 Score')
axs[1, 1].legend()

axs[1, 2].set_title('Matthews Correlation')
axs[1, 2].set_xlabel('% of pruned area')
axs[1, 2].set_ylabel('Matthews Correlation')
axs[1, 2].legend()

plt.tight_layout()
plt.show()

In [None]:
## USING AREAS

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

fig, axs = plt.subplots(2, 3, figsize=(15, 9))
values = np.arange(0, 1, 0.05)
values_100 = 100 * values

for block_size in [8, 16, 32, 64]:
  eval_results = []
  for area in values:
    # Load model
    model = load_model()

    # Modify model
    for x in model.state_dict().keys():
      tensor = model.state_dict()[x]
      if ".layer." in x and len(tensor.size()) == 2:
        randomly_prune_blocks_by_area(tensor, area, block_size)

    # Evaluate modified model and save result
    trainer = Trainer(
      model=model,
      args = training_args,
      compute_metrics=compute_metrics,
    )
    eval_results.append(trainer.evaluate(tokenized_dataset))

  eval_loss = [x['eval_loss'] for x in eval_results]
  eval_accuracy = [x['eval_accuracy'] for x in eval_results]
  eval_precision = [x['eval_precision'] for x in eval_results]
  eval_recall = [x['eval_recall'] for x in eval_results]
  eval_f1 = [x['eval_f1'] for x in eval_results]
  eval_matthews = [x['eval_matthews'] for x in eval_results]

  axs[0, 0].plot(values_100, eval_loss, label = f"Block size {block_size}")
  axs[0, 1].plot(values_100, eval_accuracy, label = f"Block size {block_size}")
  axs[0, 2].plot(values_100, eval_precision, label = f"Block size {block_size}")
  axs[1, 0].plot(values_100, eval_recall, label = f"Block size {block_size}")
  axs[1, 1].plot(values_100, eval_f1, label = f"Block size {block_size}")
  axs[1, 2].plot(values_100, eval_matthews, label = f"Block size {block_size}")

axs[0, 0].set_title('Loss')
axs[0, 0].set_xlabel('% of pruned area')
axs[0, 0].set_ylabel('Loss')
axs[0, 0].legend()

axs[0, 1].set_title('Accuracy')
axs[0, 1].set_xlabel('% of pruned area')
axs[0, 1].set_ylabel('Accuracy')
axs[0, 1].legend()

axs[0, 2].set_title('Precision')
axs[0, 2].set_xlabel('% of pruned area')
axs[0, 2].set_ylabel('Precision')
axs[0, 2].legend()

axs[1, 0].set_title('Recall')
axs[1, 0].set_xlabel('% of pruned area')
axs[1, 0].set_ylabel('Recall')
axs[1, 0].legend()

axs[1, 1].set_title('F1 Score')
axs[1, 1].set_xlabel('% of pruned area')
axs[1, 1].set_ylabel('F1 Score')
axs[1, 1].legend()

axs[1, 2].set_title('Matthews Correlation')
axs[1, 2].set_xlabel('% of pruned area')
axs[1, 2].set_ylabel('Matthews Correlation')
axs[1, 2].legend()

plt.tight_layout()
plt.show()

In [None]:
## USING AREAS

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

fig, axs = plt.subplots(2, 3, figsize=(15, 9))
values = np.arange(0, 0.1, 0.001)
values_100 = 100 * values

for block_size in [8, 16, 32, 64]:
  eval_results = []
  for area in values:
    # Load model
    model = load_model()

    # Modify model
    for x in model.state_dict().keys():
      tensor = model.state_dict()[x]
      if ".layer." in x and len(tensor.size()) == 2:
        randomly_prune_blocks_by_area(tensor, area, block_size)

    # Evaluate modified model and save result
    trainer = Trainer(
      model=model,
      args = training_args,
      compute_metrics=compute_metrics,
    )
    eval_results.append(trainer.evaluate(tokenized_dataset))

  eval_loss = [x['eval_loss'] for x in eval_results]
  eval_accuracy = [x['eval_accuracy'] for x in eval_results]
  eval_precision = [x['eval_precision'] for x in eval_results]
  eval_recall = [x['eval_recall'] for x in eval_results]
  eval_f1 = [x['eval_f1'] for x in eval_results]
  eval_matthews = [x['eval_matthews'] for x in eval_results]

  axs[0, 0].plot(values_100, eval_loss, label = f"Block size {block_size}")
  axs[0, 1].plot(values_100, eval_accuracy, label = f"Block size {block_size}")
  axs[0, 2].plot(values_100, eval_precision, label = f"Block size {block_size}")
  axs[1, 0].plot(values_100, eval_recall, label = f"Block size {block_size}")
  axs[1, 1].plot(values_100, eval_f1, label = f"Block size {block_size}")
  axs[1, 2].plot(values_100, eval_matthews, label = f"Block size {block_size}")

axs[0, 0].set_title('Loss')
axs[0, 0].set_xlabel('% of pruned area')
axs[0, 0].set_ylabel('Loss')
axs[0, 0].legend()

axs[0, 1].set_title('Accuracy')
axs[0, 1].set_xlabel('% of pruned area')
axs[0, 1].set_ylabel('Accuracy')
axs[0, 1].legend()

axs[0, 2].set_title('Precision')
axs[0, 2].set_xlabel('% of pruned area')
axs[0, 2].set_ylabel('Precision')
axs[0, 2].legend()

axs[1, 0].set_title('Recall')
axs[1, 0].set_xlabel('% of pruned area')
axs[1, 0].set_ylabel('Recall')
axs[1, 0].legend()

axs[1, 1].set_title('F1 Score')
axs[1, 1].set_xlabel('% of pruned area')
axs[1, 1].set_ylabel('F1 Score')
axs[1, 1].legend()

axs[1, 2].set_title('Matthews Correlation')
axs[1, 2].set_xlabel('% of pruned area')
axs[1, 2].set_ylabel('Matthews Correlation')
axs[1, 2].legend()

plt.tight_layout()
plt.show()

In [5]:
import matplotlib.pyplot as plt
a = [1,23,3,4]

plt.plot(a)

ModuleNotFoundError: No module named 'matplotlib'

In [None]:
print_weight_matrices(model.cpu(), ignore_zeros=True, visualization_mode='abs')

In [None]:
print(eval_results[1])

{'eval_loss': 1.1758610010147095, 'eval_accuracy': 0.5445829338446788, 'eval_f1': 0.5383867832847424, 'eval_precision': 0.8993506493506493, 'eval_recall': 0.3841886269070735, 'eval_matthews': 0.2915689932969142, 'eval_runtime': 1.0908, 'eval_samples_per_second': 956.205, 'eval_steps_per_second': 10.085}
