### Finetune DETR to detect female-ish faces in paintings

In [None]:
! pip install --upgrade scipy transformers datasets huggingface_hub pytorch-lightning pycocotools

In [None]:
!huggingface-cli login

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
from Cocordiais import CocordiaisDataset

from datasets import load_dataset
from torch.utils.data import DataLoader
from transformers import AutoModelForObjectDetection, DetrImageProcessor, Trainer, TrainingArguments

In [None]:
DETR_MODEL = "facebook/detr-resnet-50"
HF_DATASET = "thiagohersan/cordiais-faces"
HF_MODEL= "thiagohersan/detr-cordiais-autotrain"

In [None]:
detr_size = { "shortest_edge": 800, "longest_edge": 800 }
detr_processor = DetrImageProcessor.from_pretrained(DETR_MODEL, size=detr_size)

hf_dataset = load_dataset(HF_DATASET)
hf_dataset = hf_dataset["train"].train_test_split(test_size=0.2, shuffle=True, seed=101010)

dataset_train = CocordiaisDataset(hf_dataset["train"], img_processor=detr_processor, train=True)
dataset_eval = CocordiaisDataset(hf_dataset["test"], img_processor=detr_processor, train=False)

print("Number of examples:\n  Train: %s\n  Evaluation: %s" % (len(dataset_train), len(dataset_eval)))

In [None]:
labels = dataset_train.data.features["objects"].feature["category"].names
id2label = {i:l for i,l in enumerate(labels)}
label2id = {l:i for i,l in id2label.items()}

In [None]:
model = AutoModelForObjectDetection.from_pretrained(
  DETR_MODEL,
  id2label=id2label,
  label2id=label2id,
  revision="no_timm", 
  num_labels=len(id2label),
  num_queries=16,
  ignore_mismatched_sizes=True
)

In [None]:
training_args = TrainingArguments(
  output_dir=HF_MODEL,
  per_device_train_batch_size=12,
  per_device_eval_batch_size=4,
  num_train_epochs=48,
  fp16=True,
  save_strategy="epoch",
  save_total_limit=2,
  logging_strategy="epoch",
  learning_rate=1e-5,
  weight_decay=1e-4,
  remove_unused_columns=False
)

trainer = Trainer(
  model=model,
  args=training_args,
  data_collator=dataset_train.collate_batch,
  train_dataset=dataset_train.data,
  eval_dataset=dataset_eval.data,
  tokenizer=detr_processor
)

In [None]:
%load_ext tensorboard
%tensorboard --logdir lightning_logs/

In [None]:
trainer.train()

In [None]:
trainer.push_to_hub(HF_MODEL, private=True)