In [1]:
# import os
# from PIL import Image

# def is_corrupted_image(image_path):
#     try:
#         # Attempt to open the image
#         with Image.open(image_path) as img:
#             # Try to access basic properties to check if the image is valid
#             img.verify()
#             return False
#     except (IOError, SyntaxError):
#         # Image is corrupted or not supported
#         return True

# def check_and_remove_corrupted_images(directory):
#     k=0
#     total=0
#     for root, dirs, files in os.walk(directory):
#         for file in files:
#             file_path = os.path.join(root, file)

#             # Check if the file is an image (you can customize this check based on file extensions)
#             if file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
#                 if is_corrupted_image(file_path):
#                     # Remove the corrupted image
#                     os.remove(file_path)
#                     print(f"Removed corrupted image: {file_path}")
#                     k+=1
#                 else:
#                     # Print the shape of the valid image
#                     with Image.open(file_path) as img:
# #                         print(f"Image shape of {file_path}: {img.size}")
#                         print(img.layers)
#                     total+=1
#     print(k)
#     print(total)

# # Replace 'your_directory_path' with the actual path to your directory containing the 29 directories
# directory_path = './cropped_images_dataset'
# check_and_remove_corrupted_images(directory_path)


In [2]:
#!pip install "tensorflow==2.6.0"
!pip install transformers "datasets>=1.17.0" tensorboard --upgrade


Defaulting to user installation because normal site-packages is not writeable


In [3]:
# from huggingface_hub import notebook_login

# notebook_login()

In [4]:
model_id = "google/vit-base-patch16-224-in21k"


In [5]:
import os
import datasets

def create_image_folder_dataset(root_path):
    """creates `Dataset` from image folder structure"""

    # get class names by folders names
    _CLASS_NAMES= os.listdir(root_path)
    # defines `datasets` features`
    features=datasets.Features({
                      "img": datasets.Image(),
                      "label": datasets.features.ClassLabel(names=_CLASS_NAMES),
                  })
    # temp list holding datapoints for creation
    img_data_files=[]
    label_data_files=[]
    # load images into list for creation
    for img_class in os.listdir(root_path):
        for img in os.listdir(os.path.join(root_path,img_class)):
            path_=os.path.join(root_path,img_class,img)
            img_data_files.append(path_)
            label_data_files.append(img_class)
    # create dataset
    ds = datasets.Dataset.from_dict({"img":img_data_files,"label":label_data_files},features=features)
    return ds



In [6]:
eurosat_ds = create_image_folder_dataset("./cropped_images_dataset/train")
eurosat_ds_test=create_image_folder_dataset("./cropped_images_dataset/validation")

In [7]:
img_class_labels = eurosat_ds.features["label"].names

In [8]:
from transformers import ViTFeatureExtractor
from tensorflow import keras
from tensorflow.keras import layers


feature_extractor = ViTFeatureExtractor.from_pretrained(model_id)

# learn more about data augmentation here: https://www.tensorflow.org/tutorials/images/data_augmentation
data_augmentation = keras.Sequential(
    [
        layers.Resizing(feature_extractor.size['height'], feature_extractor.size['width']),
        layers.Rescaling(1./255),
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(factor=0.02),
        layers.RandomZoom(
            height_factor=0.2, width_factor=0.2
        ),
    ],
    name="data_augmentation",
)
# use keras image data augementation processing
def augmentation(examples):
    # print(examples["img"])
    data=[]
    for image in examples['img']:
        data.append(data_augmentation(image))
    examples["pixel_values"] = data
    return examples


# basic processing (only resizing)
def process(examples):
    examples.update(feature_extractor(examples['img'], ))
    return examples


2023-12-17 18:09:27.692779: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-12-17 18:09:27.713272: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-17 18:09:27.713291: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-17 18:09:27.714025: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-17 18:09:27.718244: I tensorflow/core/platform/cpu_feature_guar

In [9]:
# we are also renaming our label col to labels to use `.to_tf_dataset` later
eurosat_ds = eurosat_ds.rename_column("label", "labels")
eurosat_ds_test=eurosat_ds_test.rename_column("label", "labels")

In [10]:
# processed_dataset = eurosat_ds.map(process, batched=True)

# # augmenting dataset takes a lot of time
processed_dataset = eurosat_ds.map(process, batched=True)
processed_dataset

Map:   0%|          | 0/7401 [00:00<?, ? examples/s]

Dataset({
    features: ['img', 'labels', 'pixel_values'],
    num_rows: 7401
})

In [11]:
len(processed_dataset)

7401

In [12]:
processed_dataset_test=eurosat_ds_test.map(process, batched=True)
processed_dataset_test

Map:   0%|          | 0/606 [00:00<?, ? examples/s]

Dataset({
    features: ['img', 'labels', 'pixel_values'],
    num_rows: 606
})

In [13]:
from huggingface_hub import HfFolder
import tensorflow as tf

id2label = {str(i): label for i, label in enumerate(img_class_labels)}
label2id = {v: k for k, v in id2label.items()}

num_train_epochs = 20
train_batch_size = 32
eval_batch_size = 32
learning_rate = 3e-5
weight_decay_rate=0.01
num_warmup_steps=0
output_dir=model_id.split("/")[1]
# hub_token = HfFolder.get_token() # or your token directly "hf_xxx"
# hub_model_id = f'{model_id.split("/")[1]}-euroSat'
fp16=True

# Train in mixed-precision float16
# Comment this line out if you're using a GPU that will not benefit from this
if fp16:
    tf.keras.mixed_precision.set_global_policy("mixed_float16")


INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 4070 Laptop GPU, compute capability 8.9


2023-12-17 18:10:09.314839: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355


In [14]:
from transformers import DefaultDataCollator

# Data collator that will dynamically pad the inputs received, as well as the labels.
data_collator = DefaultDataCollator(return_tensors="tf")
# Function to process a single example
def process_example(example):
    # Transpose the pixel_values tensor to have channels dimension at the beginning
    example['pixel_values'] = tf.transpose(example['pixel_values'], perm=[0, 3, 2, 1])
    return example

# Modify the processed_dataset directly
# processed_dataset = processed_dataset.map(
#     process_example,
#     batched=True,
#     num_proc=1  # Set the number of processes according to your system capabilities
# )
# processed_dataset_test = processed_dataset_test.map(
#     process_example,
#     batched=True,
#     num_proc=1  # Set the number of processes according to your system capabilities
# )
# converting our train dataset to tf.data.Dataset
tf_train_dataset = processed_dataset.to_tf_dataset(
   columns=['pixel_values'],
   label_cols=["labels"],
   shuffle=True,
   batch_size=train_batch_size,
   collate_fn=data_collator)

# converting our test dataset to tf.data.Dataset
tf_eval_dataset = processed_dataset_test.to_tf_dataset(
   columns=['pixel_values'],
   label_cols=["labels"],
   shuffle=True,
   batch_size=eval_batch_size,
   collate_fn=data_collator)

Old behaviour: columns=['a'], labels=['labels'] -> (tf.Tensor, tf.Tensor)  
             : columns='a', labels='labels' -> (tf.Tensor, tf.Tensor)  
New behaviour: columns=['a'],labels=['labels'] -> ({'a': tf.Tensor}, {'labels': tf.Tensor})  
             : columns='a', labels='labels' -> (tf.Tensor, tf.Tensor) 


In [15]:
tf_eval_dataset

<_PrefetchDataset element_spec=(TensorSpec(shape=(None, 3, 224, 224), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))>

In [16]:
tf_train_dataset

<_PrefetchDataset element_spec=(TensorSpec(shape=(None, 3, 224, 224), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))>

In [17]:
from transformers import TFViTForImageClassification, create_optimizer
import tensorflow as tf
# Replace "your_fine_tuned_model_directory" with the actual directory where your fine-tuned model is stored.
# model2 = TFViTForImageClassification.from_pretrained("../vit-base-patch16-224-in21k")
# create optimizer wight weigh decay
num_train_steps = len(tf_train_dataset) * num_train_epochs
optimizer, lr_schedule = create_optimizer(
    init_lr=learning_rate,
    num_train_steps=num_train_steps,
    weight_decay_rate=weight_decay_rate,
    num_warmup_steps=num_warmup_steps,
)

# # load pre-trained ViT model
model = TFViTForImageClassification.from_pretrained(
    model_id,
    num_labels=len(img_class_labels),
    id2label=id2label,
    label2id=label2id,
)

# define loss
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# define metrics
metrics=[
    tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
    tf.keras.metrics.SparseTopKCategoricalAccuracy(3, name="top-3-accuracy"),
]

# # compile model
model.compile(optimizer=optimizer,
              loss=loss,
              metrics=metrics
              )


2023-12-17 18:10:15.050556: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8902
Some layers from the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing TFViTForImageClassification: ['vit/pooler/dense/kernel:0', 'vit/pooler/dense/bias:0']
- This IS expected if you are initializing TFViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier']
You should probably TRAIN this m

In [18]:
# # alternatively create Image Classification model using Keras Layer and ViTModel
# # here you can also add the processing layers of keras

# import tensorflow as tf
# from transformers import TFViTModel

# base_model = TFViTModel.from_pretrained('google/vit-base-patch16-224-in21k')


# # inputs
# pixel_values = tf.keras.layers.Input(shape=(3,224,224), name='pixel_values', dtype='float32')

# # model layer
# vit = base_model.vit(pixel_values)[0]
# classifier = tf.keras.layers.Dense(10, activation='softmax', name='outputs')(vit[:, 0, :])

# # model
# keras_model = tf.keras.Model(inputs=pixel_values, outputs=classifier)


In [19]:
import os
from transformers.keras_callbacks import PushToHubCallback
from tensorflow.keras.callbacks import TensorBoard as TensorboardCallback, EarlyStopping

callbacks=[]

callbacks.append(TensorboardCallback(log_dir=os.path.join(output_dir,"logs")))
callbacks.append(EarlyStopping(monitor="val_accuracy",patience=1))
# if hub_token:
#     callbacks.append(PushToHubCallback(output_dir=output_dir,
#                                      hub_model_id=hub_model_id,
#                                      hub_token=hub_token))

In [20]:
train_results = model.fit(
    tf_train_dataset,
    validation_data=tf_eval_dataset,
    callbacks=callbacks,
    epochs=num_train_epochs,
)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20


In [21]:
train_results

<keras.src.callbacks.History at 0x7f6614570610>

In [22]:
# from huggingface_hub import HfApi

# api = HfApi()

# user = api.whoami(hub_token)


feature_extractor.save_pretrained(output_dir)
model.save_pretrained(output_dir)
# api.upload_file(
#     token=hub_token,
#     repo_id=f"{user['name']}/{hub_model_id}",
#     path_or_fileobj=os.path.join(output_dir,"preprocessor_config.json"),
#     path_in_repo="preprocessor_config.json",
# )


In [23]:
from transformers import ViTFeatureExtractor, TFViTForImageClassification
import tensorflow as tf
from PIL import Image
import os
import torch

# Load the feature extractor and the trained ViT model
feature_extractor = ViTFeatureExtractor.from_pretrained('./vit-base-patch16-224-in21k')
model = TFViTForImageClassification.from_pretrained('./vit-base-patch16-224-in21k')

# Path to the test directory
test_dir = './cropped_images_dataset/test'

# List all subdirectories (each subdirectory corresponds to a class)
class_names = os.listdir(test_dir)

# Lists to store true labels and predicted labels
true_labels = []
predicted_labels = []

# Iterate through each class directory
for class_name in class_names:
    class_path = os.path.join(test_dir, class_name)

    # Iterate through images in the class directory
    for image_name in os.listdir(class_path):
        image_path = os.path.join(class_path, image_name)

        # Open and preprocess the image
        image = Image.open(image_path)
        inputs = feature_extractor(images=image, return_tensors="pt")
        pixel_values_tensor = tf.convert_to_tensor(inputs['pixel_values'], dtype=tf.float32)
        # Make predictions
        with torch.no_grad():
            outputs = model(pixel_values=pixel_values_tensor)
            logits = outputs.logits
            logits = tf.nn.softmax(logits)
            # model predicts one of the 1000 ImageNet classes
            predicted_class_idx = tf.argmax(logits, axis=-1).numpy().item()
            
        # Append true and predicted labels
        true_labels.append(class_name)
        predicted_labels.append(model.config.id2label[predicted_class_idx])

# Evaluate the model
from sklearn.metrics import accuracy_score, classification_report

accuracy = accuracy_score(true_labels, predicted_labels)
report = classification_report(true_labels, predicted_labels)

print(f'Accuracy: {accuracy}')
print(f'Classification Report:\n{report}')


All model checkpoint layers were used when initializing TFViTForImageClassification.

All the layers of TFViTForImageClassification were initialized from the model checkpoint at ./vit-base-patch16-224-in21k.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFViTForImageClassification for predictions without further training.


Accuracy: 0.7504930966469427
Classification Report:
                                      precision    recall  f1-score   support

                     Apple Scab Leaf       0.64      0.50      0.56        14
                          Apple leaf       0.80      0.75      0.77        16
                     Apple rust leaf       0.68      0.93      0.79        14
                    Bell_pepper leaf       0.79      0.71      0.75        48
               Bell_pepper leaf spot       0.29      0.73      0.41        11
                      Blueberry leaf       0.94      0.89      0.91       159
                         Cherry leaf       0.65      0.73      0.69        15
                 Corn Gray leaf spot       0.57      0.31      0.40        13
                    Corn leaf blight       0.68      0.87      0.76        31
                      Corn rust leaf       0.94      0.79      0.86        19
                          Peach leaf       0.78      0.88      0.83        51
           

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
