In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import cv2
import os

def extract_frames_with_annotation(video_folder, annotation_root_folder, output_root_folder, frames_per_second=25):
    # Iterate through videos and annotations in the specified folder
    for root, dirs, files in os.walk(video_folder):
        for file in files:
            if file.endswith(".mov"):  # Assuming video files have the .mov extension
                video_path = os.path.join(root, file)

                # Construct the relative path to the annotation file
                relative_annotation_path = os.path.relpath(root, video_folder)
                annotation_file_path = os.path.join(annotation_root_folder, relative_annotation_path, f"{os.path.splitext(file)[0]}.face")

                # Create output folder based on the directory structure
                output_folder = os.path.join(output_root_folder, relative_annotation_path)
                os.makedirs(output_folder, exist_ok=True)

                # Open the video file
                cap = cv2.VideoCapture(video_path)

                # Check if the video file opened successfully
                if not cap.isOpened():
                    print(f"Error opening video file: {video_path}")
                    continue

                # Get video properties
                frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
                fps = cap.get(cv2.CAP_PROP_FPS)
                duration = frame_count / fps

                print(f"Processing video: {video_path}")
                print(f"Total frames: {frame_count}")
                print(f"Frames per second (fps): {fps}")
                print(f"Duration (seconds): {duration}")

                # Check if the annotation file exists
                if os.path.exists(annotation_file_path):
                    # Read annotation information from the text file
                    annotations = []
                    with open(annotation_file_path, "r") as annotation_file:
                        annotations = [line.strip().split() for line in annotation_file]

                    # Iterate through frames and annotations
                    for annotation in annotations:
                        current_frame = int(annotation[0])
                        x, y, width, height = map(int, annotation[1:5])

                        # Set the video capture to the desired frame
                        cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame - 1)

                        # Read the frame and annotate it
                        success, frame = cap.read()
                        if success:
                            frame_filename = os.path.join(output_folder, f"frame_{current_frame}.png")
                            cv2.imwrite(frame_filename, frame)

                            # Annotate the frame
                            cv2.rectangle(frame, (x, y), (x + width, y + height), (0, 255, 0), 2)  # Green rectangle for annotation

                            annotated_frame_filename = os.path.join(output_folder, f"annotated_frame_{current_frame}.png")
                            cv2.imwrite(annotated_frame_filename, frame)

                else:
                    print(f"Annotation file not found: {annotation_file_path}")

                # Release the video capture object
                cap.release()

if __name__ == "__main__":
    # Provide the path to the root video folder, annotation root folder, and output root folder
    video_folder = "/content/drive/MyDrive/idiapdataset"
    annotation_root_folder = "/content/drive/MyDrive/idiapdataset/face-locations"
    output_root_folder = "/content/data"

    # Extract frames and annotate based on the provided text files and folder structure
    extract_frames_with_annotation(video_folder, annotation_root_folder, output_root_folder, frames_per_second=25)

Processing video: /content/drive/MyDrive/idiapdataset/test/real/client009_session01_webcam_authenticate_adverse_2.mov
Total frames: 375
Frames per second (fps): 25.0
Duration (seconds): 15.0
Annotation file not found: /content/drive/MyDrive/idiapdataset/face-locations/test/real/client009_session01_webcam_authenticate_adverse_2.face
Processing video: /content/drive/MyDrive/idiapdataset/test/real/client009_session01_webcam_authenticate_adverse_1.mov
Total frames: 375
Frames per second (fps): 25.0
Duration (seconds): 15.0
Annotation file not found: /content/drive/MyDrive/idiapdataset/face-locations/test/real/client009_session01_webcam_authenticate_adverse_1.face
Processing video: /content/drive/MyDrive/idiapdataset/test/real/client009_session01_webcam_authenticate_controlled_1.mov
Total frames: 375
Frames per second (fps): 25.0
Duration (seconds): 15.0
Annotation file not found: /content/drive/MyDrive/idiapdataset/face-locations/test/real/client009_session01_webcam_authenticate_controlled

In [None]:
import shutil
# Specify the folder path
folder_path = '/content/data'
# Specify the output zip file name
output_zip = '/content/folder.zip'
# Create a zip file of the folder
shutil.make_archive(output_zip.replace('.zip', ''), 'zip', folder_path)

'/content/folder.zip'

In [None]:
import os
import csv
def label_images(root_path, csv_filename):
    with open(csv_filename, 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        csv_writer.writerow(['Image_Path', 'Label'])  # Header
        # Traverse the directory
        for root, dirs, files in os.walk(root_path):
            for filename in files:
                # Extract information from the directory structure
                folder_parts = root.split(os.path.sep)
                dataset_type = folder_parts[-3]  # 'train', 'devel', 'test'
                class_label = folder_parts[-2]  # 'attack' or 'real'
                # Construct the full path to the image
                image_path = os.path.join(root, filename)
                # Determine the label based on the directory structure
                if dataset_type == 'enroll':
                    label = 'enroll'
                else:
                    label = 'attack' if class_label == 'attack' else 'real'
                # Write the image path and label to the CSV file
                csv_writer.writerow([image_path, label])
if __name__ == "__main__":
    root_directory = '/content/drive/MyDrive/DATAS'
    csv_output_filename = 'labeled_images.csv'
    label_images(root_directory, csv_output_filename)

In [None]:
#!pip install "tensorflow==2.6.0"
!pip install transformers "datasets>=1.17.0" tensorboard --upgrade

Collecting transformers
  Downloading transformers-4.39.3-py3-none-any.whl (8.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.8/8.8 MB[0m [31m23.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets>=1.17.0
  Downloading datasets-2.18.0-py3-none-any.whl (510 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m31.2 MB/s[0m eta [36m0:00:00[0m
Collecting tensorboard
  Downloading tensorboard-2.16.2-py3-none-any.whl (5.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m36.6 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets>=1.17.0)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from datasets>=1.17.0)
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)
[2K     [90m━━━━━

In [None]:
!sudo apt-get install git-lfs

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
git-lfs is already the newest version (3.0.2-1ubuntu0.2).
0 upgraded, 0 newly installed, 0 to remove and 45 not upgraded.


In [None]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
model_id = "google/vit-base-patch16-224-in21k"

In [None]:
!pip install datasets



In [None]:
import datasets
import os
from datasets import Dataset, Features, Image
def create_image_folder_dataset(root_path):
    """Creates `Dataset` from image folder structure"""
    # Get class names by folder names
    _CLASS_NAMES = os.listdir(root_path)
    # Define `datasets` features
    features = Features({
        "img": Image(),
        "label": datasets.features.ClassLabel(names=_CLASS_NAMES),  # Corrected the reference here
    })
    # Temporary lists holding data points for creation
    img_data_files = []
    label_data_files = []
    # Load images into lists for creation
    for img_class in _CLASS_NAMES:
        class_path = os.path.join(root_path, img_class)
        if os.path.isdir(class_path):
            for img in os.listdir(class_path):
                img_path = os.path.join(class_path, img)
                if os.path.isfile(img_path):
                    img_data_files.append(img_path)
                    label_data_files.append(img_class)
    # Create dataset
    ds = Dataset.from_dict({"img": img_data_files, "label": label_data_files}, features=features)
    return ds
# Usage example
root_path = "/content/drive/MyDrive/DATAS"
dataset = create_image_folder_dataset(root_path)
print(dataset)

Dataset({
    features: ['img', 'label'],
    num_rows: 998
})


In [None]:
eurosat_ds = create_image_folder_dataset("/content/drive/MyDrive/DATAS")

In [None]:
img_class_labels = eurosat_ds.features["label"].names

In [None]:
from transformers import ViTFeatureExtractor
from tensorflow import keras
from tensorflow.keras import layers
feature_extractor = ViTFeatureExtractor.from_pretrained(model_id)
# learn more about data augmentation here: https://www.tensorflow.org/tutorials/images/data_augmentation
data_augmentation = keras.Sequential(
    [
        layers.Resizing(feature_extractor.size, feature_extractor.size),
        layers.Rescaling(1./255),
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(factor=0.02),
        layers.RandomZoom(
            height_factor=0.2, width_factor=0.2
        ),
    ],
    name="data_augmentation",
)
# use keras image data augementation processing
def augmentation(examples):
    # print(examples["img"])
    examples["pixel_values"] = [data_augmentation(image) for image in examples["img"]]
    return examples
# basic processing (only resizing)
def process(examples):
    examples.update(feature_extractor(examples['img'], ))
    return examples
# we are also renaming our label col to labels to use `.to_tf_dataset` later
eurosat_ds = eurosat_ds.rename_column("label", "labels")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]



In [None]:
processed_dataset = eurosat_ds.map(process, batched=True)
processed_dataset
# # augmenting dataset takes a lot of time
# processed_dataset = eurosat_ds.map(augmentation, batched=True)

Map:   0%|          | 0/998 [00:00<?, ? examples/s]

Dataset({
    features: ['img', 'labels', 'pixel_values'],
    num_rows: 998
})

In [None]:
# test size will be 15% of train dataset
test_size=.15
processed_dataset = processed_dataset.shuffle().train_test_split(test_size=test_size)

In [None]:
from huggingface_hub import HfFolder
import tensorflow as tf
id2label = {str(i): label for i, label in enumerate(img_class_labels)}
label2id = {v: k for k, v in id2label.items()}
num_train_epochs = 5
train_batch_size = 32
eval_batch_size = 32
learning_rate = 3e-5
weight_decay_rate=0.09
num_warmup_steps=0
output_dir=model_id.split("/")[1]
hub_token = HfFolder.get_token() # or your token directly "hf_xxx"
hub_model_id = f'{model_id.split("/")[1]}-euroSat'
fp16=True
# Train in mixed-precision float16
# Comment this line out if you're using a GPU that will not benefit from this
if fp16:
  tf.keras.mixed_precision.set_global_policy("mixed_float16")

In [None]:
from transformers import DefaultDataCollator
# Data collator that will dynamically pad the inputs received, as well as the labels.
data_collator = DefaultDataCollator(return_tensors="tf")
# converting our train dataset to tf.data.Dataset
tf_train_dataset = processed_dataset["train"].to_tf_dataset(
   columns=['pixel_values'],
   label_cols=["labels"],
   shuffle=True,
   batch_size=train_batch_size,
   collate_fn=data_collator)
# converting our test dataset to tf.data.Dataset
tf_eval_dataset = processed_dataset["test"].to_tf_dataset(
   columns=['pixel_values'],
   label_cols=["labels"],
   shuffle=True,
   batch_size=eval_batch_size,
   collate_fn=data_collator)

Old behaviour: columns=['a'], labels=['labels'] -> (tf.Tensor, tf.Tensor)  
             : columns='a', labels='labels' -> (tf.Tensor, tf.Tensor)  
New behaviour: columns=['a'],labels=['labels'] -> ({'a': tf.Tensor}, {'labels': tf.Tensor})  
             : columns='a', labels='labels' -> (tf.Tensor, tf.Tensor) 


In [None]:
from transformers import TFViTForImageClassification, create_optimizer
import tensorflow as tf
# create optimizer wight weigh decay
num_train_steps = len(tf_train_dataset) * num_train_epochs
optimizer, lr_schedule = create_optimizer(
    init_lr=learning_rate,
    num_train_steps=num_train_steps,
    weight_decay_rate=weight_decay_rate,
    num_warmup_steps=num_warmup_steps,
)
# load pre-trained ViT model
model = TFViTForImageClassification.from_pretrained(
    model_id,
    num_labels=len(img_class_labels),
    id2label=id2label,
    label2id=label2id,
)
# define loss
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
# define metrics
class APCER(tf.keras.metrics.Metric):
    def __init__(self, name="apcer", **kwargs):
        super(APCER, self).__init__(name=name, **kwargs)
        self.apcer_total = self.add_weight(name="apcer_total", initializer="zeros")
        self.total_samples = self.add_weight(name="total_samples", initializer="zeros")
    def update_state(self, y_true, y_pred, sample_weight=None):
        # Calculate APCER
        apcer_batch = tf.reduce_mean(tf.cast(tf.equal(y_true, 0), tf.float32))
        self.apcer_total.assign_add(apcer_batch)
        self.total_samples.assign_add(1)
    def result(self):
        return self.apcer_total / self.total_samples
class BPCER(tf.keras.metrics.Metric):
    def __init__(self, name="bpcer", **kwargs):
        super(BPCER, self).__init__(name=name, **kwargs)
        self.bpcer_total = self.add_weight(name="bpcer_total", initializer="zeros")
        self.total_samples = self.add_weight(name="total_samples", initializer="zeros")
    def update_state(self, y_true, y_pred, sample_weight=None):
        # Calculate BPCER
        bpcer_batch = tf.reduce_mean(tf.cast(tf.equal(y_true, 1), tf.float32))
        self.bpcer_total.assign_add(bpcer_batch)
        self.total_samples.assign_add(1)
    def result(self):
        return self.bpcer_total / self.total_samples
metrics = [
    tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
    tf.keras.metrics.SparseTopKCategoricalAccuracy(3, name="top-3-accuracy"),
    APCER(name="apcer"),
    BPCER(name="bpcer")
]
# compile model
model.compile(optimizer=optimizer,
              loss=loss,
              metrics=metrics
              )

config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing TFViTForImageClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFViTForImageClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFViTForImageClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# alternatively create Image Classification model using Keras Layer and ViTModel
# here you can also add the processing layers of keras
import tensorflow as tf
from transformers import TFViTModel
base_model = TFViTModel.from_pretrained('google/vit-base-patch16-224-in21k')
# inputs
pixel_values = tf.keras.layers.Input(shape=(3,224,224), name='pixel_values', dtype='float32')
# model layer
vit = base_model.vit(pixel_values)[0]
classifier = tf.keras.layers.Dense(10, activation='softmax', name='outputs')(vit[:, 0, :])
import tensorflow as tf
# model
keras_model = tf.keras.Model(inputs=pixel_values, outputs=classifier)

All PyTorch model weights were used when initializing TFViTModel.

All the weights of TFViTModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFViTModel for predictions without further training.


In [None]:
import os
from transformers.keras_callbacks import PushToHubCallback
from tensorflow.keras.callbacks import TensorBoard as TensorboardCallback, EarlyStopping
callbacks=[]
callbacks.append(TensorboardCallback(log_dir=os.path.join(output_dir,"logs")))
callbacks.append(EarlyStopping(monitor="val_accuracy",patience=1))
if hub_token:
  callbacks.append(PushToHubCallback(output_dir=output_dir,
                                     hub_model_id=hub_model_id,
                                     hub_token=hub_token))

For more details, please read https://huggingface.co/docs/huggingface_hub/concepts/git_vs_http.
Cloning https://huggingface.co/kruti-15/vit-base-patch16-224-in21k-euroSat into local empty directory.


Download file tf_model.h5:   0%|          | 8.00k/328M [00:00<?, ?B/s]

Download file logs/validation/events.out.tfevents.1711052270.87dd90a6ab55.1194.1.v2: 100%|##########| 7.33k/7.…

Download file logs/train/events.out.tfevents.1711052067.87dd90a6ab55.1194.0.v2:   1%|1         | 32.0k/2.78M […

Download file logs/train/events.out.tfevents.1710741036.0dfd8475ee55.992.0.v2:   1%|1         | 32.0k/2.78M [0…

Clean file logs/validation/events.out.tfevents.1711052270.87dd90a6ab55.1194.1.v2:  14%|#3        | 1.00k/7.33k…

Download file logs/validation/events.out.tfevents.1711863796.b417286a1a2b.394.1.v2: 100%|##########| 7.99k/7.9…

Clean file logs/validation/events.out.tfevents.1711863796.b417286a1a2b.394.1.v2:  13%|#2        | 1.00k/7.99k …

Download file logs/train/events.out.tfevents.1711866643.b417286a1a2b.20675.0.v2:   1%|          | 16.6k/2.82M …

Download file logs/train/events.out.tfevents.1711865895.b417286a1a2b.17291.0.v2:   1%|1         | 32.0k/2.78M …

Download file logs/train/events.out.tfevents.1711863608.b417286a1a2b.394.0.v2:   1%|1         | 31.5k/2.78M [0…

Download file logs/validation/events.out.tfevents.1711866830.b417286a1a2b.20675.1.v2: 100%|##########| 4.03k/4…

Clean file logs/validation/events.out.tfevents.1711866830.b417286a1a2b.20675.1.v2:  25%|##4       | 1.00k/4.03…

Download file logs/validation/events.out.tfevents.1710741221.0dfd8475ee55.992.1.v2: 100%|##########| 2.49k/2.4…

Clean file logs/validation/events.out.tfevents.1710741221.0dfd8475ee55.992.1.v2:  40%|####      | 1.00k/2.49k …

Download file logs/train/events.out.tfevents.1711866084.b417286a1a2b.17291.1.v2: 100%|##########| 78.0/78.0 [0…

Clean file logs/train/events.out.tfevents.1711866084.b417286a1a2b.17291.1.v2: 100%|##########| 78.0/78.0 [00:0…

Clean file logs/train/events.out.tfevents.1711052067.87dd90a6ab55.1194.0.v2:   0%|          | 1.00k/2.78M [00:…

Clean file logs/train/events.out.tfevents.1710741036.0dfd8475ee55.992.0.v2:   0%|          | 1.00k/2.78M [00:0…

Clean file logs/train/events.out.tfevents.1711865895.b417286a1a2b.17291.0.v2:   0%|          | 1.00k/2.78M [00…

Clean file logs/train/events.out.tfevents.1711863608.b417286a1a2b.394.0.v2:   0%|          | 1.00k/2.78M [00:0…

Clean file logs/train/events.out.tfevents.1711866643.b417286a1a2b.20675.0.v2:   0%|          | 1.00k/2.82M [00…

Clean file tf_model.h5:   0%|          | 1.00k/328M [00:00<?, ?B/s]

In [None]:
train_results = model.fit(
    tf_train_dataset,
    validation_data=tf_eval_dataset,
    callbacks=callbacks,
    epochs=num_train_epochs,
)

Epoch 1/5


Cause: for/else statement not yet supported


Cause: for/else statement not yet supported
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
from huggingface_hub import HfApi
api = HfApi()
user = api.whoami(hub_token)
feature_extractor.save_pretrained(output_dir)
api.upload_file(
    token=hub_token,
    repo_id=f"{user['name']}/{hub_model_id}",
    path_or_fileobj=os.path.join(output_dir,"preprocessor_config.json"),
    path_in_repo="preprocessor_config.json",
)

CommitInfo(commit_url='https://huggingface.co/kruti-15/vit-base-patch16-224-in21k-euroSat/commit/59abf943c6b94c47b52d660d294f7cac72e2c723', commit_message='Upload preprocessor_config.json with huggingface_hub', commit_description='', oid='59abf943c6b94c47b52d660d294f7cac72e2c723', pr_url=None, pr_revision=None, pr_num=None)