In [1]:
import tensorflow as tf
from transformers import RobertaTokenizerFast, TFRobertaModel
import os
import pathlib
import numpy as np
import cloudpickle
import pandas as pd

In [2]:
# Model training notebook: https://github.com/ksv-muralidhar/hugging_face_tf_fine_tuning/blob/main/roberta_text_classification.ipynb
tf_model = tf.keras.models.load_model('arxiv_classifier_hf_roberta.h5', custom_objects={"TFRobertaModel": TFRobertaModel})

In [3]:
tf_model.summary()

Model: "model_9"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_ids (InputLayer)      [(None, 200)]                0         []                            
                                                                                                  
 attention_mask (InputLayer  [(None, 200)]                0         []                            
 )                                                                                                
                                                                                                  
 tf_roberta_model (TFRobert  TFBaseModelOutputWithPooli   1246456   ['input_ids[0][0]',           
 aModel)                     ngAndCrossAttentions(last_   32         'attention_mask[0][0]']      
                             hidden_state=(None, 200, 7                                     

In [4]:
# Model quantization
converter = tf.lite.TFLiteConverter.from_keras_model(tf_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()

tflite_models_dir = pathlib.Path(os.path.join("tflite_models"))
tflite_models_dir.mkdir(exist_ok=True, parents=True)
tflite_model_file = tflite_models_dir/"arxiv_classifier_hf_roberta.tflite"
tflite_model_file.write_bytes(tflite_model)

125234144

In [5]:
print(f'TF model size: {os.path.getsize("arxiv_classifier_hf_roberta.h5") / 1000000} MB')
print(f'TFLite model size: {os.path.getsize(os.path.join("tflite_models", "arxiv_classifier_hf_roberta.tflite")) / 1000000} MB')

TF model size: 1503.006144 MB
TFLite model size: 125.234144 MB


In [6]:
with open("arxiv_category_preprocessor_labelencoder.bin", "rb") as model_file_obj:
        text_preprocessor, label_encoder = cloudpickle.load(model_file_obj)
        
interpreter = tf.lite.Interpreter(model_path=os.path.join("tflite_models", "arxiv_classifier_hf_roberta.tflite"))
tf_model = tf.keras.models.load_model('arxiv_classifier_hf_roberta.h5', custom_objects={"TFRobertaModel": TFRobertaModel})

In [7]:
# TF model and TFlite model inference

def inference(text):
    text = text_preprocessor.preprocess(pd.Series(text))[0]
    
    model_checkpoint = "roberta-base"
    tokenizer = RobertaTokenizerFast.from_pretrained(model_checkpoint)
    tokens = tokenizer(text, max_length=200, padding="max_length", truncation=True, return_tensors="tf")
    
    # tflite model inference  
    interpreter.allocate_tensors()
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()[0]
    attention_mask, input_ids = tokens['attention_mask'], tokens['input_ids']
    interpreter.set_tensor(input_details[0]["index"], attention_mask)
    interpreter.set_tensor(input_details[1]["index"], input_ids)
    interpreter.invoke()
    tflite_pred = interpreter.get_tensor(output_details["index"])[0]
    tflite_pred_argmax = np.argmax(tflite_pred)
    tflite_pred = f"{label_encoder.inverse_transform([tflite_pred_argmax])} ({tflite_pred[tflite_pred_argmax]})"
    
    # tf model inference
    input_ = [input_ids, attention_mask]
    tf_pred = tf_model.predict(input_, verbose=0)[0]
    tf_pred_argmax = np.argmax(tf_pred)
    tf_pred = f"{label_encoder.inverse_transform([tf_pred_argmax])} ({tf_pred[tf_pred_argmax]})"
    print(f'TF Model Prediction: {tf_pred}\nTFLITE Model Prediction: {tflite_pred}')
    return None

In [8]:
text = '''
We report the development of GPT-4, a large-scale, multimodal model which can accept image and text inputs and produce text outputs. 
While less capable than humans in many real-world scenarios, GPT-4 exhibits human-level performance on 
various professional and academic benchmarks, including passing a simulated bar exam with a score around the top 10% of test takers. 
GPT-4 is a Transformer-based model pre-trained to predict the next token in a document. The post-training alignment process results in 
improved performance on measures of factuality and adherence to desired behavior. A core component of this project was developing 
infrastructure and optimization methods that behave predictably across a wide range of scales. This allowed us to accurately predict 
some aspects of GPT-4's performance based on models trained with no more than 1/1,000th the compute of GPT-4.
'''
_ = inference(text)

TF Model Prediction: ['cs'] (0.9761691689491272)
TFLITE Model Prediction: ['cs'] (0.9693808555603027)


In [9]:
text = '''
The Boreal Summer Intraseasonal Oscillation (BSISO) is a pronounced mode of tropical intraseasonal convective 
variability during the boreal summer. One of the most prominent features of the BSISO is the northward movement
of convection in the South Asian monsoon region. Using long-term observational and reanalysis data, we identify
two types of BSISO events, one which propagates northward over South Asia from the equatorial Indian Ocean, and
the other which doesn't. By investigating the difference between these two types of events, we identify the 
critical mechanisms involved in northward propagation. A moisture budget reveals that for propagating cases 
when organized convection first appears over the equatorial Indian Ocean, easterlies on the northern flank of 
the Rossby wave response to enhanced convection (cyclonic) as well as those on the southern flank of the Rossby 
wave response (anticyclonic) to the suppressed convection further north act on the climatological moisture distribution, 
and rapidly moisten the atmosphere over the southern Arabian Sea. This results in the characteristic northwest-southeast-oriented 
convection observed in the BSISO. Now, as this tilted belt of enhanced convection is present south of the previous cycle 
of suppressed convection associated with subsidence, in the presence of background easterly vertical shear of the monsoon 
winds, a latitudinally tilted vortex tilting term is generated due to the meridional gradient in vertical velocity. 
The generation of positive vorticity anomalies over the Arabian Sea more than over the Bay of Bengal, leads to a 
tilted gyre north of the convective anomaly.'''
_ = inference(text)

TF Model Prediction: ['physics'] (0.9798455834388733)
TFLITE Model Prediction: ['physics'] (0.9792115688323975)


In [10]:
text = '''
Across the stable density stratification of the abyssal ocean, deep dense water is slowly 
propelled upward by sustained, though irregular, turbulent mixing. The resulting mean 
upwelling is key to setting large-scale oceanic circulation properties, such as heat and 
carbon transport. It is generally accepted that in the ocean interior, this turbulent mixing 
is caused mainly by breaking internal waves, which are predominantly generated by winds and 
tides, interact nonlinearly, thereby fluxing energy down to ever smaller scales, and finally 
become unstable, break and mix the water column. This paradigm forms the conceptual backbone of 
the widely used Finescale Parameterization. This formula estimates small-scale mixing from the 
readily observable internal wave activity at larger scales and theoretical scaling laws for the 
downscale nonlinear energy flux, but has never been fully explained theoretically. Here, we 
close this gap using wave-wave interaction theory with input from both localized high-resolution 
experiments and combined global observational datasets. We find near-ubiquitous agreement between 
our predictions, derived from first-principles alone, and the observed mixing patterns in the global 
ocean interior. Our findings lay the foundations for a new type of wave-driven mixing parameterization 
for ocean general circulation models that is entirely physics-based, which is key to reliably 
represent climate states that differ substantially from today's.
'''
_ = inference(text)

TF Model Prediction: ['physics'] (0.9791610836982727)
TFLITE Model Prediction: ['physics'] (0.976417064666748)


In [11]:
text = '''
We systematically investigate the functors between sites which induce morphisms 
of relative toposes. In particualar, we establish a relative version of Diaconescu's 
theorem, characterizing the relative geometric morphisms towards a relative sheaf 
topos in terms of a notion of flat (equivalently, filtered) functor relative to the base topos.
'''
_ = inference(text)

TF Model Prediction: ['math'] (0.9992749094963074)
TFLITE Model Prediction: ['math'] (0.9992457628250122)


In [12]:
text = '''
Emotion recognition in conversations (ERC), the task of recognizing the emotion of each 
utterance in a conversation, is crucial for building empathetic machines. Existing 
studies focus mainly on capturing context- and speaker-sensitive dependencies on the 
textual modality but ignore the significance of multimodal information. Different from 
emotion recognition in textual conversations, capturing intra- and inter-modal interactions 
between utterances, learning weights between different modalities, and enhancing modal representations 
play important roles in multimodal ERC. In this paper, we propose a transformer-based model with 
self-distillation (SDT) for the task. The transformer-based model captures intra- and inter-modal 
interactions by utilizing intra- and inter-modal transformers, and learns weights between modalities 
dynamically by designing a hierarchical gated fusion strategy. Furthermore, to learn more expressive 
modal representations, we treat soft labels of the proposed model as extra training supervision. 
Specifically, we introduce self-distillation to transfer knowledge of hard and soft labels from 
the proposed model to each modality. Experiments on IEMOCAP and MELD datasets demonstrate that 
SDT outperforms previous state-of-the-art baselines.
'''
_ = inference(text)

TF Model Prediction: ['cs'] (0.5600845813751221)
TFLITE Model Prediction: ['cs'] (0.5706031322479248)


In [13]:
text = '''
Many enthusiasts and experts publish forecasts of the order players 
are drafted into professional sports leagues, known as mock drafts. 
Using a novel dataset of mock drafts for the National Basketball Association (NBA), 
we analyze authors' mock draft accuracy over time and ask how we can reasonably use 
information from multiple authors. To measure how accurate mock drafts are, we assume 
that both mock drafts and the actual draft are ranked lists, and we propose that 
rank-biased distance (RBD) of Webber et al. (2010) is the appropriate error metric 
for mock draft accuracy. This is because RBD allows mock drafts to have a different 
length than the actual draft, accounts for players not appearing in both lists, and 
weights errors early in the draft more than errors later on. We validate that mock drafts, 
as expected, improve in accuracy over the course of a season, and that accuracy of the mock 
drafts produced right before their drafts is fairly stable across seasons. To be able to 
combine information from multiple mock drafts into a single consensus mock draft, we also 
propose a ranked-list combination method based on the ideas of ranked-choice voting. 
We show that our method provides improved forecasts over the standard Borda count combination 
method used for most similar analyses in sports, and that either combination method provides 
a more accurate forecast over time than any single author.
'''
_ = inference(text)

TF Model Prediction: ['stat'] (0.7425745129585266)
TFLITE Model Prediction: ['stat'] (0.7227165699005127)
