# Florence2 Fine-Tuning
This Colab demonstrates how to fine-tune the Florence2 language model using HuggingFace Transformers.

## Setup
Install the required packages.

In [None]:
!pip install -q transformers datasets accelerate

## Load Dataset
Replace `'path/to/data'` with your dataset path or dataset name on HuggingFace Hub.

In [None]:
from datasets import load_dataset
dataset = load_dataset('path/to/data')
dataset = dataset['train'].train_test_split(test_size=0.1)
train_dataset = dataset['train']
eval_dataset = dataset['test']

## Load Model and Tokenizer

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
model_name = 'florence/florence-2'  # Update with the correct model name
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

## Training Configuration

In [None]:
from transformers import TrainingArguments, Trainer
training_args = TrainingArguments(
    output_dir='florence2-finetuned',
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    logging_steps=10,
    evaluation_strategy='steps',
    save_steps=100,
)

## Preprocess

In [None]:
def tokenize_function(examples):
    return tokenizer(examples['text'], truncation=True)

tokenized_train = train_dataset.map(tokenize_function, batched=True)
tokenized_eval = eval_dataset.map(tokenize_function, batched=True)

## Fine-Tune

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval,
)
trainer.train()

## Save Model

In [None]:
trainer.save_model('florence2-finetuned')

## Inference

In [None]:
inputs = tokenizer('Hello, world!', return_tensors='pt').to(model.device)
outputs = model.generate(**inputs, max_new_tokens=20)
print(tokenizer.decode(outputs[0]))