In [11]:
from transformers import AutoTokenizer

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [66]:
raw_inputs = [
    "I've been waiting for a HuggingFace course my whole life.",
    "I hate this so much!",
]
inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="pt")
print(inputs)

{'input_ids': tensor([[  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,
          2607,  2026,  2878,  2166,  1012,   102],
        [  101,  1045,  5223,  2023,  2061,  2172,   999,   102,     0,     0,
             0,     0,     0,     0,     0,     0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])}


In [67]:
tokenizer.decode(inputs['input_ids'][0])

"[CLS] i've been waiting for a huggingface course my whole life. [SEP]"

In [13]:
from transformers import AutoModel

model=AutoModel.from_pretrained(checkpoint)

In [14]:
outputs=model(**inputs)
print(outputs.last_hidden_state.shape)

torch.Size([2, 16, 768])


In [16]:
# head + model for sequence classification
from transformers import AutoModelForSequenceClassification

model=AutoModelForSequenceClassification.from_pretrained(checkpoint)

In [None]:
outputs=model(**inputs)
outputs

tensor([[-1.5607,  1.6123],
        [ 4.1692, -3.3464]], grad_fn=<AddmmBackward0>)

In [19]:
# converting scores to probability 
import torch

predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
print(predictions)

tensor([[4.0195e-02, 9.5980e-01],
        [9.9946e-01, 5.4418e-04]], grad_fn=<SoftmaxBackward0>)


In [25]:
model.config.id2label

{0: 'NEGATIVE', 1: 'POSITIVE'}

The AutoModel class and its associates are actually simple wrappers designed to fetch the appropriate model architecture for a given checkpoint. It’s an “auto” class meaning it will guess the appropriate model architecture for you and instantiate the correct model class. However, if you know the type of model you want to use, you can use the class that defines its architecture directly:

In [26]:
from transformers import BertModel

model = BertModel.from_pretrained("bert-base-cased")

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

In [None]:
model.save_pretrained("/content/bert")

In [28]:
!ls /content/bert

config.json  model.safetensors


In [31]:
import json

with open("/content/bert/config.json",'r') as f:
    config=json.load(f)
config

{'architectures': ['BertModel'],
 'attention_probs_dropout_prob': 0.1,
 'classifier_dropout': None,
 'dtype': 'float32',
 'gradient_checkpointing': False,
 'hidden_act': 'gelu',
 'hidden_dropout_prob': 0.1,
 'hidden_size': 768,
 'initializer_range': 0.02,
 'intermediate_size': 3072,
 'layer_norm_eps': 1e-12,
 'max_position_embeddings': 512,
 'model_type': 'bert',
 'num_attention_heads': 12,
 'num_hidden_layers': 12,
 'pad_token_id': 0,
 'position_embedding_type': 'absolute',
 'transformers_version': '4.57.2',
 'type_vocab_size': 2,
 'use_cache': True,
 'vocab_size': 28996}

In [32]:
# reusing a saved model
model = AutoModel.from_pretrained("/content/bert")

In [None]:
# both are same automodel is general and from bertmodel is specific for bert model
model = BertModel.from_pretrained("/content/bert")

In [34]:
# uploading model to hugging face hub
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
model.push_to_hub("my-awesome-model")

## Note - 
1) As shown above loading and reusing model similar thing can be done with tokenizer (same functions are present in AutoModel/specific tokenizer)
2) similar to autoModel and specific models(like bert) tokenizers are also present (like autotokenizer and berttokenizer) 