In [1]:
from CompressionMethods import static_quantization, distillation, pruning
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


### Initialize all the required parameters

In [2]:
model = "adjohn1313/bert-base-finetuned"
model_type = "bert"
dataset = "sem_eval_2018_task_1"
subtask = "subtask5.english"

### 1. Static Quantization

In [3]:
staticQuantizationObject = static_quantization.staticQuantization(model_id=model, dataset_id=dataset, dataset_subsetid=subtask)
staticQuantizationObject.run_experiment()

Results of 4-bit Static Quantization:
{'accuracy': 0.27539503386004516,
 'mean_time': 0.02808135303902034,
 'method': '4bit-static-quantization',
 'size': 86.50099754333496}
####################################################################################################
Results of 8-bit Static Quantization:
{'accuracy': 0.26749435665914223,
 'mean_time': 0.10081183049264396,
 'method': '8bit-static-quantization',
 'size': 127.28224754333496}
####################################################################################################




### 2. Distillation

In [4]:
student_model = "distilbert/distilbert-base-uncased"
distillationObject = distillation.DistillationModule(teacher_model_id=model, student_model_id=student_model, 
                                                     dataset=dataset, sub_dataset=subtask)
distillationObject.run_experiment()

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 6838/6838 [00:00<00:00, 13901.18 examples/s]
Map: 100%|██████████| 3259/3259 [00:00<00:00, 13814.12 examples/s]
Map: 100%|██████████| 886/886 [00:00<00:00, 14192.35 examples/s]
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
1,1.0625,0.344743,0.69694,0.797612,0.266366
2,0.4006,0.300171,0.698539,0.795519,0.278781
3,0.2749,0.283933,0.6955,0.79231,0.286682
4,0.2297,0.271652,0.69681,0.792373,0.284424
5,0.191,0.264767,0.69613,0.792872,0.273138


Training time:  448.6524896621704
Results of Distillation:
{'accuracy': 0.2742663656884876,
 'mean_time': 0.006577949760729788,
 'method': 'Distillation (BERT Only)',
 'size': 255.44340133666992}
####################################################################################################


### 3. Pruning

In [5]:
pruningObject = pruning.PruneModel(model_id=model, dataset_id=dataset, dataset_subsetid=subtask)
pruningObject.run_experiment()

Results of Pruning:
{'method': 'Pruning', 'size': 417.6816825866699, 'mean_time': 0.012086537298714872, 'accuracy': 0.27200902934537247}
####################################################################################################


### Saving the results from all experiments

In [6]:
results = pd.DataFrame([
    staticQuantizationObject.results_4bit, 
    staticQuantizationObject.results_8bit, 
    distillationObject.results_distillation, 
    pruningObject.results
    ])
display(results.head())
model_name = model.split("/")[-1]
results.to_csv(f"{model_name}_results.csv", index=False)

Unnamed: 0,method,size,mean_time,accuracy
0,4bit-static-quantization,86.500998,0.028081,0.275395
1,8bit-static-quantization,127.282248,0.100812,0.267494
2,Distillation (BERT Only),255.443401,0.006578,0.274266
3,Pruning,417.681683,0.012087,0.272009
