In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
from datasets import load_dataset
dataset = load_dataset("sasha/dog-food")

  from .autonotebook import tqdm as notebook_tqdm
Downloading metadata: 100%|██████████| 1.12k/1.12k [00:00<00:00, 739kB/s]
Downloading readme: 100%|██████████| 4.37k/4.37k [00:00<00:00, 2.86MB/s]


Downloading and preparing dataset None/None (download: 271.88 MiB, generated: 309.93 MiB, post-processed: Unknown size, total: 581.81 MiB) to /home/codespace/.cache/huggingface/datasets/sasha___parquet/sasha--dog-food-ec42a61d5519cc88/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec...


Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]
Downloading data:   0%|          | 0.00/199M [00:00<?, ?B/s][A
Downloading data:   5%|▌         | 10.0M/199M [00:00<00:01, 100MB/s][A
Downloading data:  10%|█         | 20.0M/199M [00:00<00:01, 97.1MB/s][A
Downloading data:  15%|█▍        | 29.7M/199M [00:00<00:01, 94.6MB/s][A
Downloading data:  20%|█▉        | 39.2M/199M [00:00<00:01, 94.5MB/s][A
Downloading data:  24%|██▍       | 48.6M/199M [00:00<00:01, 91.1MB/s][A
Downloading data:  29%|██▉       | 57.8M/199M [00:00<00:01, 82.6MB/s][A
Downloading data:  33%|███▎      | 66.2M/199M [00:00<00:01, 80.1MB/s][A
Downloading data:  38%|███▊      | 76.5M/199M [00:00<00:01, 87.0MB/s][A
Downloading data:  43%|████▎     | 85.5M/199M [00:00<00:01, 87.8MB/s][A
Downloading data:  48%|████▊     | 96.1M/199M [00:01<00:01, 93.2MB/s][A
Downloading data:  53%|█████▎    | 106M/199M [00:01<00:01, 65.7MB/s] [A
Downloading data:  58%|█████▊    | 115M/199M [00:01<00:01, 72.0MB/s][A


Dataset parquet downloaded and prepared to /home/codespace/.cache/huggingface/datasets/sasha___parquet/sasha--dog-food-ec42a61d5519cc88/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec. Subsequent calls will reuse this data.


100%|██████████| 2/2 [00:00<00:00, 39.27it/s]


In [6]:
labels = dataset["train"].features["label"].names
label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
    label2id[label] = str(i)
    id2label[str(i)] = label
    
label2id, id2label

({'dog': '0', 'food': '1'}, {'0': 'dog', '1': 'food'})

In [10]:
from transformers import AutoImageProcessor

checkpoint = "google/vit-base-patch16-224-in21k"
image_processor = AutoImageProcessor.from_pretrained(checkpoint)

Downloading (…)rocessor_config.json: 100%|██████████| 160/160 [00:00<00:00, 149kB/s]
Downloading (…)lve/main/config.json: 100%|██████████| 502/502 [00:00<00:00, 398kB/s]


In [12]:
from tensorflow import keras
from tensorflow.keras import layers

size = (image_processor.size["height"], image_processor.size["width"])

train_data_augmentation = keras.Sequential(
    [
        layers.RandomCrop(size[0], size[1]),
        layers.Rescaling(scale=1.0 / 127.5, offset=-1),
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(factor=0.02),
        layers.RandomZoom(height_factor=0.2, width_factor=0.2),
    ],
    name="train_data_augmentation",
)

val_data_augmentation = keras.Sequential(
    [
        layers.CenterCrop(size[0], size[1]),
        layers.Rescaling(scale=1.0 / 127.5, offset=-1),
    ],
    name="val_data_augmentation",
)

2023-04-12 14:36:18.452006: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [13]:
import numpy as np
import tensorflow as tf
from PIL import Image


def convert_to_tf_tensor(image: Image):
    np_image = np.array(image)
    tf_image = tf.convert_to_tensor(np_image)
    # `expand_dims()` is used to add a batch dimension since
    # the TF augmentation layers operates on batched inputs.
    return tf.expand_dims(tf_image, 0)


def preprocess_train(example_batch):
    """Apply train_transforms across a batch."""
    images = [
        train_data_augmentation(convert_to_tf_tensor(image.convert("RGB"))) for image in example_batch["image"]
    ]
    example_batch["pixel_values"] = [tf.transpose(tf.squeeze(image)) for image in images]
    return example_batch


def preprocess_val(example_batch):
    """Apply val_transforms across a batch."""
    images = [
        val_data_augmentation(convert_to_tf_tensor(image.convert("RGB"))) for image in example_batch["image"]
    ]
    example_batch["pixel_values"] = [tf.transpose(tf.squeeze(image)) for image in images]
    return example_batch

In [15]:
dataset["train"].set_transform(preprocess_train)
dataset["test"].set_transform(preprocess_val)

In [16]:
from transformers import DefaultDataCollator
data_collator = DefaultDataCollator(return_tensors="tf")

In [17]:
import evaluate
accuracy = evaluate.load("accuracy")

Downloading builder script: 100%|██████████| 4.20k/4.20k [00:00<00:00, 2.37MB/s]


In [18]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

In [19]:
from transformers import create_optimizer

batch_size = 16
num_epochs = 5
num_train_steps = len(dataset["train"]) * num_epochs
learning_rate = 3e-5
weight_decay_rate = 0.01

optimizer, lr_schedule = create_optimizer(
    init_lr=learning_rate,
    num_train_steps=num_train_steps,
    weight_decay_rate=weight_decay_rate,
    num_warmup_steps=0,
)

ImportError: 
create_optimizer requires the TensorFlow library but it was not found in your environment.
However, we were able to find a PyTorch installation. PyTorch classes do not begin
with "TF", but are otherwise identically named to our TF classes.
If you want to use PyTorch, please use those classes instead!

If you really do want to use TensorFlow, please follow the instructions on the
installation page https://www.tensorflow.org/install that match your environment.


In [20]:
from transformers import TFAutoModelForImageClassification

model = TFAutoModelForImageClassification.from_pretrained(
    checkpoint,
    id2label=id2label,
    label2id=label2id,
)

ImportError: 
TFAutoModelForImageClassification requires the TensorFlow library but it was not found in your environment.
However, we were able to find a PyTorch installation. PyTorch classes do not begin
with "TF", but are otherwise identically named to our TF classes.
If you want to use PyTorch, please use those classes instead!

If you really do want to use TensorFlow, please follow the instructions on the
installation page https://www.tensorflow.org/install that match your environment.
