# Fine tuning Hugging Face BERT model for text classification
[![Open in Layer](https://app.layer.ai/assets/badge.svg)](https://app.layer.ai/layer/derrick-bert) [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/layerai/examples/blob/main/text-classification/text_classification_fine_tuning_hf.ipynb) [![Layer Examples Github](https://badgen.net/badge/icon/github?icon=github&label)](https://github.com/layerai/examples/tree/main/text-classification)

In this notebook we fine tune a [BERT model](https://huggingface.co/bert-base-uncased) for text classification.

In [None]:
# Transformers installation
! pip install transformers datasets -qqq

In [None]:
!pip install layer --upgrade -qqq

In [None]:
import layer
from layer.decorators import model,pip_requirements,fabric
layer.login()
layer.init("bert")

In [5]:
@pip_requirements(packages=["transformers","sentencepiece"])
@fabric("f-medium")
@model(name="bert-tokenizer")
def download_tokenizer():
    from transformers import BertTokenizer
    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
    return tokenizer

In [None]:
# Run the project on Layer Infra
layer.run([download_tokenizer])
# download_tokenizer()

In [7]:
def tokenize_function(examples):
    from transformers import BertTokenizer
    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
    return tokenizer(examples["text"], padding="max_length", truncation=True)

In [8]:
@pip_requirements(packages=["transformers","sentencepiece","datasets"])
@fabric("f-gpu-small")
@model("bert")
def train():    
    import tensorflow as tf
    from transformers import TFAutoModelForSequenceClassification
    from transformers import DefaultDataCollator
    from transformers import AutoTokenizer
    from datasets import load_dataset

    dataset = load_dataset("imdb")

    tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
    tokenized_datasets = dataset.map(tokenize_function, batched=True)

    small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
    small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))

    data_collator = DefaultDataCollator(return_tensors="tf")

    tf_train_dataset = small_train_dataset.to_tf_dataset(
        columns=["attention_mask", "input_ids", "token_type_ids"],
        label_cols=["labels"],
        shuffle=True,
        collate_fn=data_collator,
        batch_size=8,)

    tf_validation_dataset = small_eval_dataset.to_tf_dataset(
        columns=["attention_mask", "input_ids", "token_type_ids"],
        label_cols=["labels"],
        shuffle=False,
        collate_fn=data_collator,
        batch_size=8,)
    model = TFAutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=5e-5),loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),metrics=tf.metrics.SparseCategoricalAccuracy(),)
    model.fit(tf_train_dataset, validation_data=tf_validation_dataset, epochs=3)
    return model

In [None]:
layer.run([train])
# train()

In [None]:
bert = layer.get_model('bert').get_train()
bert

In [None]:
tokenizer = layer.get_model('bert-tokenizer').get_train()
input_sequence = "I really loved that movie, the script was on point"
# encode context the generation is conditioned on
input_ids = tokenizer.encode(input_sequence, return_tensors='tf')
output = bert(input_ids)
logits = output.logits

In [12]:
logits

<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[-1.9551574,  1.9291866]], dtype=float32)>

In [13]:
# https://huggingface.co/docs/transformers/main/en/model_doc/distilbert
import tensorflow as tf
predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
bert.config.id2label[predicted_class_id]

'LABEL_1'

## Where to go from here
To learn more about using layer, you can: 
- Join our [Slack Community ](https://bit.ly/layercommunityslack)
- Visit [Layer Examples Repo](https://github.com/layerai/examples) for more examples
- Browse [Trending Layer Projects](https://layer.ai) on our mainpage
- Check out [Layer Documentation](https://docs.app.layer.ai) to learn more