In [None]:
#Instalamos el paquete gdown para poder descargar el dataset desde el gdrive
!pip install gdown

In [None]:
#Descargamos el dataset
import gdown
output = 'blood2.zip'
gdown.download(id='1Uic5AmCmaP2-Um_q8OxpiATjylqQjQ7E', output=output, quiet=False)

In [None]:
# Descomprimimos el dataset
!unzip blood2.zip

In [None]:
# blocks output in Colab 💄
%%capture

#Instalamos los paquetes de HuggingFace

! pip install datasets transformers

## Cargar dataset

In [None]:
from datasets import load_dataset

ds = load_dataset('imagefolder', data_dir="blood/images")
ds

In [None]:
ex = ds['train'][400]
ex

In [None]:
image = ex['image']
image

In [None]:
# Extraemos las etiquetas del problema

labels = ds['train'].features['label']
labels

In [None]:
labels.int2str(ex['label'])

In [None]:
#from transformers.utils.dummy_vision_objects import ImageGPTFeatureExtractor

# Mostramos algunos ejemplos de cada clase

import random
from PIL import ImageDraw, ImageFont, Image

def show_examples(ds, seed: int = 1234, examples_per_class: int = 3, size=(350, 350)):

    w, h = size
    labels = ds['train'].features['label'].names
    grid = Image.new('RGB', size=(examples_per_class * w, len(labels) * h))
    draw = ImageDraw.Draw(grid)
    font = ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationMono-Bold.ttf", 24)

    for label_id, label in enumerate(labels):

        # Filter the dataset by a single label, shuffle it, and grab a few samples
        ds_slice = ds['train'].filter(lambda ex: ex['label'] == label_id).shuffle(seed).select(range(examples_per_class))

        # Plot this label's examples along a row
        for i, example in enumerate(ds_slice):
            image = example['image']
            idx = examples_per_class * label_id + i
            box = (idx % examples_per_class * w, idx // examples_per_class * h)
            grid.paste(image.resize(size), box=box)
            draw.text(box, label, (255, 255, 255), font=font)

    return grid

show_examples(ds, seed=random.randint(0, 1337), examples_per_class=3)

# Crear Preprocesador de datos

In [None]:
# Cada modelo tiene su propia clase para pre-procesar datos

from transformers import ViTFeatureExtractor

model_name_or_path = 'google/vit-base-patch16-224-in21k'
feature_extractor = ViTFeatureExtractor.from_pretrained(model_name_or_path)

In [None]:
feature_extractor

In [None]:
feature_extractor(image, return_tensors='pt')

In [None]:
def process_example(example):
    inputs = feature_extractor(example['image'], return_tensors='pt')
    inputs['labels'] = example['label']
    return inputs

In [None]:
process_example(ds['train'][0])

In [None]:
def transform(example_batch):
    # Take a list of PIL images and turn them to pixel values
    inputs = feature_extractor([x for x in example_batch['image']], return_tensors='pt')

    # Don't forget to include the labels!
    inputs['labels'] = example_batch['label']
    return inputs

prepared_ds = ds.with_transform(transform)

In [None]:
prepared_ds['train'][0:2]

# Entrenamiento

In [None]:

import torch

def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['labels'] for x in batch])
    }

In [None]:
import numpy as np
from datasets import load_metric

metric = load_metric("accuracy")
def compute_metrics(p):
    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)

In [None]:
# Creamos y cargamos un modelo pre-entrenado

from transformers import ViTForImageClassification

labels = ds['train'].features['label'].names

model = ViTForImageClassification.from_pretrained(
    model_name_or_path,
    num_labels=len(labels),
    id2label={str(i): c for i, c in enumerate(labels)},
    label2id={c: str(i) for i, c in enumerate(labels)}
)

In [None]:
# Los parámetros de entrenamiento se configuran en un objeto TrainingArguments

from transformers import TrainingArguments

training_args = TrainingArguments(
  output_dir="./prueba",
  per_device_train_batch_size=16,
  evaluation_strategy="steps",
  num_train_epochs=4,
  fp16=True,
  save_steps=100,
  eval_steps=100,
  logging_steps=10,
  learning_rate=2e-4,
  save_total_limit=2,
  remove_unused_columns=False,
  push_to_hub=False,
  report_to='tensorboard',
  load_best_model_at_end=True,
)

In [None]:
#Creamos un objeto Trainer

from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    train_dataset=prepared_ds["train"],
    eval_dataset=prepared_ds["validation"],
    tokenizer=feature_extractor,
)

In [None]:
# Hacemos el entrenamiento

train_results = trainer.train()
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()

# Test

In [None]:
# Testeamos sobre la data de test

metrics = trainer.evaluate(prepared_ds['test'])
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

# Inference

In [None]:
from transformers import pipeline


vision_classifier = pipeline(task='image-classification', model=model, feature_extractor=feature_extractor, device='cuda:0')
preds = vision_classifier(images='/content/blood/images/test/EOSINOPHIL/_0_1616.jpeg')
print(preds)