# Fine-tunning a model

In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset

from transformers import (
  Trainer,TrainingArguments
)

from transformers import pipeline

## Preparing a dataset

In [None]:
dataset = load_dataset("wikimedia/wikipedia", "20231101.en", split="train")
dataset = dataset.select(range(3))

model_name = "distilbert-base-uncased-finetuned-sst-2-english"

# Load the model
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Use tokenizer on text
dataset = dataset.map(lambda row: tokenizer(row["text"], padding=True, max_length=512, truncation=True), keep_in_memory=True)

## Building the trainer

In [None]:
dataset = load_dataset("yelp_review_full")

tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(10))
small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(10))

model_name = "google-bert/bert-base-cased"

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=5)

training_args = TrainingArguments(output_dir=".results")

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset
)

trainer.train()

## Using the fine-tunned model

In [None]:
text_example = "I am a HUGE fan of romantic comedies."

# Create the classifier
classifier = pipeline(task="sentiment-analysis", model=".results")

# Classify the text
results = classifier(text=text_example)

print(results)