In [1]:
import torch
from transformers import BertConfig, BertTokenizer, BertTokenizerFast, BertForSequenceClassification
import onnxruntime as ort
from onnxruntime_tools import optimizer
import argparse
import pandas as pd
import numpy as np
from transformers.convert_graph_to_onnx import convert
import os
import time
import torch.nn.functional as F
import onnx

In [18]:
from quantize import quantize, QuantizationMode

In [25]:
model_path = '/scratch/da2734/twitter/jobs/onnx/results_simpletransformers_jun3_10Klabels_0_all_labels/lost_job_1mo/'
onnx_path = '/scratch/da2734/twitter/jobs/onnx/results_simpletransformers_jun3_10Klabels_0_all_labels/lost_job_1mo/onnx/'

for label in ["lost_job_1mo","is_unemployed", "job_search", "is_hired_1mo", "job_offer"]:

    print(label)
    model_path = '/scratch/da2734/twitter/jobs/onnx/results_simpletransformers_jun3_10Klabels_0_all_labels/{}/'.format(label)
    onnx_path = '/scratch/da2734/twitter/jobs/onnx/results_simpletransformers_jun3_10Klabels_0_all_labels/{}/onnx/'.format(label)
    os.makedirs(onnx_path)
    
    print('converting')
    convert(framework="pt", 
        model=model_path, 
        tokenizer="DeepPavlov/bert-base-cased-conversational",
        output=onnx_path+'converted.onnx', 
        opset=11)

    print('optimizing')
    # ONNX optimization
    optimized_model = optimizer.optimize_model(onnx_path+'/converted.onnx',
                                               model_type='bert', 
                                               num_heads=12, 
                                               hidden_size=768)

    optimized_onnx_model_path = os.path.join(onnx_path, 'bert_optimized.onnx')
    optimized_model.save_model_to_file(optimized_onnx_model_path)
    print('Optimized model saved at :', optimized_onnx_model_path)

    print('quantizing')    
    # ONNX quantization
    model = onnx.load(onnx_path+'/converted.onnx')
    quantized_model = quantize(model, quantization_mode=QuantizationMode.IntegerOps, static=False)
    optimized_quantized_onnx_model_path = os.path.join(os.path.dirname(optimized_onnx_model_path), 'bert_optimized_quantized.onnx')
    onnx.save(quantized_model, optimized_quantized_onnx_model_path)
    print('Quantized&optimized model saved at :', optimized_quantized_onnx_model_path)
    
    break

lost_job_1mo
converting
ONNX opset version set to: 11
Loading pipeline (model: /scratch/da2734/twitter/jobs/onnx/results_simpletransformers_jun3_10Klabels_0_all_labels/lost_job_1mo/, tokenizer: DeepPavlov/bert-base-cased-conversational)
PyTorch: 1.5.0+cu101
optimizing
Optimized model saved at : /scratch/da2734/twitter/jobs/onnx/results_simpletransformers_jun3_10Klabels_0_all_labels/lost_job_1mo/onnx/bert_optimized.onnx
quantizing
Quantized&optimized model saved at : /scratch/da2734/twitter/jobs/onnx/results_simpletransformers_jun3_10Klabels_0_all_labels/lost_job_1mo/onnx/bert_optimized_quantized.onnx
