In [None]:
!pip install transformers
import transformers

# Pipeline Skeleton

##1. Load the Tokenizer

In [4]:
from transformers import pipeline

classifier = pipeline("sentiment-analysis")
classifier(
    [
        "I love you.",
        "I hate this so much!",
    ]
)

#default checkpoint for sentiment-analysis = distilbert-base-uncased-finetuned-sst-2-english
from transformers import AutoTokenizer

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english)


Downloading:   0%|          | 0.00/629 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/255M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Transformer doesn't care about the tensors, but they do care that the input should be tensor

That is why we use tokenizer to 'tensor'fy the input

In [6]:
raw_inputs = [
    "I've been waiting for this movie my whole life!",
    "I hate this so much!",
]
inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="tf")

print("input_ids \n")
print(inputs.input_ids)
print("attention_mask \n")
print(inputs.attention_mask)

input_ids 

tf.Tensor(
[[ 101 1045 1005 2310 2042 3403 2005 2023 3185 2026 2878 2166  999  102]
 [ 101 1045 5223 2023 2061 2172  999  102    0    0    0    0    0    0]], shape=(2, 14), dtype=int32)
attention_mask 

tf.Tensor(
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1]
 [1 1 1 1 1 1 1 1 0 0 0 0 0 0]], shape=(2, 14), dtype=int32)


## 2. Import the model

In [7]:
from transformers import TFAutoModel

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
model = TFAutoModel.from_pretrained(checkpoint)

Downloading:   0%|          | 0.00/256M [00:00<?, ?B/s]

Some layers from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english were not used when initializing TFDistilBertModel: ['pre_classifier', 'dropout_19', 'classifier']
- This IS expected if you are initializing TFDistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFDistilBertModel were initialized from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertModel for predictions without further training.


Each dimensions having the following

- Batch size: The number of sequences processed at a time (2 in our example).
- Sequence length: The length of the numerical representation of the sequence (16 in our example).
- Hidden size: The vector dimension of each model input.

INPUT (tokenized) : 2,14

OUTPUT : 2,14,768

In [8]:
outputs = model(inputs)
print(outputs.last_hidden_state.shape)

(2, 14, 768)


In [10]:
#Using for Sequence Classification
from transformers import TFAutoModelForSequenceClassification

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
model_2 = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
outputs_2 = model_2(inputs)

Some layers from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english were not used when initializing TFDistilBertForSequenceClassification: ['dropout_19']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFDistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english and are newly initialized: ['dropout_38']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


##3. Post-processing (into softmax)

In [11]:
#Then process to make more sense
import tensorflow as tf
predictions = tf.math.softmax(outputs_2.logits, axis=-1)
print(predictions) #probability as positive vs negative

tf.Tensor(
[[1.4316937e-03 9.9856830e-01]
 [9.9945587e-01 5.4418371e-04]], shape=(2, 2), dtype=float32)


In [15]:
model_2.config.id2label

{0: 'NEGATIVE', 1: 'POSITIVE'}

#2. Model itself

## Building configuration

In [16]:
from transformers import AutoConfig
bert_config = AutoConfig.from_pretrained("bert-base-cased")
# SAME AS Building the config directly
#bert_config = BertConfig()

type(bert_config)

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

transformers.models.bert.configuration_bert.BertConfig

In [17]:
#Saving the model structure
from transformers import BertConfig,TFBertModel
bert_config = BertConfig.from_pretrained("bert-base-cased")
bert_model = TFBertModel(bert_config)
bert_model.save_pretrained("bert-model")

In [18]:
ls

[0m[01;34mbert-model[0m/  [01;34msample_data[0m/


In [22]:
from transformers import BertConfig, TFBertModel

# Building the config - initializing with random values
config = BertConfig()
# Building the model from the config (bring empty TF BertModel, then load the configuration)
model = TFBertModel(config)
print(config)

#Instead of this, just use the from_pretrained() model
model_loaded = TFBertModel.from_pretrained("bert-base-cased")
model.save_pretrained("~./example")

BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.17.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}



Downloading:   0%|          | 0.00/502M [00:00<?, ?B/s]

Some layers from the model checkpoint at bert-base-cased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


When entering, should have these two files
config.json tf_model.h5

config.json : attributes for building the model

tf_model.h5 : containing all the weights


In [None]:
#Can also change the details
from transformers import BertConfig,TFBertModel
bert_config_1 = BertConfig.from_pretrained("bert-base-cased",num_hidden_layers=10)
bert_model_1 = TFBertModel(bert_config_1)
bert_model_1

<transformers.models.bert.modeling_tf_bert.TFBertModel at 0x7f80c9c919d0>

Here, we only need the input_ids

In [29]:
sequences = ["Hello!", "Cool.", "Nice!"]
tokenized_sequence = tokenizer(sequences, padding=True, truncation=True, return_tensors="tf")
tokenized_sequence.input_ids

<tf.Tensor: shape=(3, 4), dtype=int32, numpy=
array([[ 101, 7592,  999,  102],
       [ 101, 4658, 1012,  102],
       [ 101, 3835,  999,  102]], dtype=int32)>

In [31]:
import tensorflow as tf

model_inputs = tf.constant(tokenized_sequence.input_ids)

In [32]:
model_inputs

<tf.Tensor: shape=(3, 4), dtype=int32, numpy=
array([[ 101, 7592,  999,  102],
       [ 101, 4658, 1012,  102],
       [ 101, 3835,  999,  102]], dtype=int32)>

In [35]:
output = model(model_inputs)
#Need input_ids only to feed the model