In [1]:
#!pip install "tensorflow==2.6.0"
!pip install transformers "datasets>=1.17.0" tensorboard --upgrade


Defaulting to user installation because normal site-packages is not writeable


In [2]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [3]:
model_id = "google/vit-base-patch16-224-in21k"


In [4]:
import os
import datasets

def create_image_folder_dataset(root_path):
    """creates `Dataset` from image folder structure"""

    # get class names by folders names
    _CLASS_NAMES= os.listdir(root_path)
    # defines `datasets` features`
    features=datasets.Features({
                      "img": datasets.Image(),
                      "label": datasets.features.ClassLabel(names=_CLASS_NAMES),
                  })
    # temp list holding datapoints for creation
    img_data_files=[]
    label_data_files=[]
    # load images into list for creation
    for img_class in os.listdir(root_path):
        for img in os.listdir(os.path.join(root_path,img_class)):
            path_=os.path.join(root_path,img_class,img)
            img_data_files.append(path_)
            label_data_files.append(img_class)
    # create dataset
    ds = datasets.Dataset.from_dict({"img":img_data_files,"label":label_data_files},features=features)
    return ds



In [5]:
eurosat_ds = create_image_folder_dataset("./CROP/CROP")

In [6]:
img_class_labels = eurosat_ds.features["label"].names

In [7]:
from transformers import ViTFeatureExtractor
from tensorflow import keras
from tensorflow.keras import layers


feature_extractor = ViTFeatureExtractor.from_pretrained(model_id)

# learn more about data augmentation here: https://www.tensorflow.org/tutorials/images/data_augmentation
data_augmentation = keras.Sequential(
    [
        layers.Resizing(feature_extractor.size['height'], feature_extractor.size['width']),
        layers.Rescaling(1./255),
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(factor=0.02),
        layers.RandomZoom(
            height_factor=0.2, width_factor=0.2
        ),
    ],
    name="data_augmentation",
)
# use keras image data augementation processing
def augmentation(examples):
    # print(examples["img"])
    examples["pixel_values"] = [data_augmentation(image) for image in examples["img"]]
    return examples


# basic processing (only resizing)
def process(examples):
    examples.update(feature_extractor(examples['img'], ))
    return examples

# we are also renaming our label col to labels to use `.to_tf_dataset` later
eurosat_ds = eurosat_ds.rename_column("label", "labels")


2023-12-13 23:08:46.313033: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-12-13 23:08:46.564822: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-13 23:08:46.564903: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-13 23:08:46.580647: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-13 23:08:46.664110: I tensorflow/core/platform/cpu_feature_guar

In [8]:
# processed_dataset = eurosat_ds.map(process, batched=True)

# # augmenting dataset takes a lot of time
processed_dataset = eurosat_ds.map(augmentation, batched=True)
processed_dataset

Map:   0%|          | 0/2363 [00:00<?, ? examples/s]

Dataset({
    features: ['img', 'labels', 'pixel_values'],
    num_rows: 2363
})

In [9]:
# test size will be 15% of train dataset
test_size=.15

processed_dataset = processed_dataset.shuffle().train_test_split(test_size=test_size)

In [11]:
from huggingface_hub import HfFolder
import tensorflow as tf

id2label = {str(i): label for i, label in enumerate(img_class_labels)}
label2id = {v: k for k, v in id2label.items()}

num_train_epochs = 10
train_batch_size = 32
eval_batch_size = 32
learning_rate = 3e-5
weight_decay_rate=0.01
num_warmup_steps=0
output_dir=model_id.split("/")[1]
# hub_token = HfFolder.get_token() # or your token directly "hf_xxx"
# hub_model_id = f'{model_id.split("/")[1]}-euroSat'
fp16=True

# Train in mixed-precision float16
# Comment this line out if you're using a GPU that will not benefit from this
if fp16:
    tf.keras.mixed_precision.set_global_policy("mixed_float16")


INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 4070 Laptop GPU, compute capability 8.9


2023-12-13 23:13:29.183252: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355


In [12]:
from transformers import DefaultDataCollator

# Data collator that will dynamically pad the inputs received, as well as the labels.
data_collator = DefaultDataCollator(return_tensors="tf")
# Function to process a single example
def process_example(example):
    # Transpose the pixel_values tensor to have channels dimension at the beginning
    example['pixel_values'] = tf.transpose(example['pixel_values'], perm=[0, 3, 2, 1])
    return example

# Modify the processed_dataset directly
processed_dataset['train'] = processed_dataset['train'].map(
    process_example,
    batched=True,
    num_proc=1  # Set the number of processes according to your system capabilities
)
# Modify the processed_dataset directly
processed_dataset['test'] = processed_dataset['test'].map(
    process_example,
    batched=True,
    num_proc=1  # Set the number of processes according to your system capabilities
)
# converting our train dataset to tf.data.Dataset
tf_train_dataset = processed_dataset["train"].to_tf_dataset(
   columns=['pixel_values'],
   label_cols=["labels"],
   shuffle=True,
   batch_size=train_batch_size,
   collate_fn=data_collator)

# converting our test dataset to tf.data.Dataset
tf_eval_dataset = processed_dataset["test"].to_tf_dataset(
   columns=['pixel_values'],
   label_cols=["labels"],
   shuffle=True,
   batch_size=eval_batch_size,
   collate_fn=data_collator)

Map:   0%|          | 0/2008 [00:00<?, ? examples/s]

Map:   0%|          | 0/355 [00:00<?, ? examples/s]

Old behaviour: columns=['a'], labels=['labels'] -> (tf.Tensor, tf.Tensor)  
             : columns='a', labels='labels' -> (tf.Tensor, tf.Tensor)  
New behaviour: columns=['a'],labels=['labels'] -> ({'a': tf.Tensor}, {'labels': tf.Tensor})  
             : columns='a', labels='labels' -> (tf.Tensor, tf.Tensor) 


In [13]:
tf_eval_dataset

<_PrefetchDataset element_spec=(TensorSpec(shape=(None, 3, 224, 224), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))>

In [15]:
from transformers import TFViTForImageClassification, create_optimizer
import tensorflow as tf
# Replace "your_fine_tuned_model_directory" with the actual directory where your fine-tuned model is stored.
model2 = TFViTForImageClassification.from_pretrained("../vit-base-patch16-224-in21k")
# create optimizer wight weigh decay
num_train_steps = len(tf_train_dataset) * num_train_epochs
optimizer, lr_schedule = create_optimizer(
    init_lr=learning_rate,
    num_train_steps=num_train_steps,
    weight_decay_rate=weight_decay_rate,
    num_warmup_steps=num_warmup_steps,
)

# # load pre-trained ViT model
# model = TFViTForImageClassification.from_pretrained(
#     model_id,
#     num_labels=len(img_class_labels),
#     id2label=id2label,
#     label2id=label2id,
# )

# define loss
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# define metrics
metrics=[
    tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
    tf.keras.metrics.SparseTopKCategoricalAccuracy(3, name="top-3-accuracy"),
]

# # compile model
model2.compile(optimizer=optimizer,
              loss=loss,
              metrics=metrics
              )


2023-12-13 23:18:29.881041: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8902
All model checkpoint layers were used when initializing TFViTForImageClassification.

All the layers of TFViTForImageClassification were initialized from the model checkpoint at ../vit-base-patch16-224-in21k.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFViTForImageClassification for predictions without further training.


In [None]:
# # alternatively create Image Classification model using Keras Layer and ViTModel
# # here you can also add the processing layers of keras

# import tensorflow as tf
# from transformers import TFViTModel

# base_model = TFViTModel.from_pretrained('google/vit-base-patch16-224-in21k')


# # inputs
# pixel_values = tf.keras.layers.Input(shape=(3,224,224), name='pixel_values', dtype='float32')

# # model layer
# vit = base_model.vit(pixel_values)[0]
# classifier = tf.keras.layers.Dense(10, activation='softmax', name='outputs')(vit[:, 0, :])

# # model
# keras_model = tf.keras.Model(inputs=pixel_values, outputs=classifier)


In [17]:
import os
from transformers.keras_callbacks import PushToHubCallback
from tensorflow.keras.callbacks import TensorBoard as TensorboardCallback, EarlyStopping

callbacks=[]

callbacks.append(TensorboardCallback(log_dir=os.path.join(output_dir,"logs")))
callbacks.append(EarlyStopping(monitor="val_accuracy",patience=2))
# if hub_token:
#     callbacks.append(PushToHubCallback(output_dir=output_dir,
#                                      hub_model_id=hub_model_id,
#                                      hub_token=hub_token))

In [18]:
train_results = model2.fit(
    tf_train_dataset,
    validation_data=tf_eval_dataset,
    callbacks=callbacks,
    epochs=num_train_epochs,
)


Epoch 1/10
Epoch 2/10
Epoch 3/10


In [20]:
train_results

<keras.src.callbacks.History at 0x7f96a8156d50>

In [21]:
# from huggingface_hub import HfApi

# api = HfApi()

# user = api.whoami(hub_token)


feature_extractor.save_pretrained(output_dir)
model2.save_pretrained(output_dir)
# api.upload_file(
#     token=hub_token,
#     repo_id=f"{user['name']}/{hub_model_id}",
#     path_or_fileobj=os.path.join(output_dir,"preprocessor_config.json"),
#     path_in_repo="preprocessor_config.json",
# )
