## Importing Libraries

In [3]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
import numpy as np
import cv2
import random
from glob import glob
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from patchify import patchify
import tensorflow as tf
tf.config.set_visible_devices([], 'GPU')
import import_ipynb
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import *
import glob
import natsort
import traceback
import logging
logging.basicConfig(level=logging.DEBUG, format='%(levelname)s: %(message)s')
import sys 
sys.version

2023-05-24 20:40:44.273860: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:266] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


'3.9.16 (main, Mar  8 2023, 14:00:05) \n[GCC 11.2.0]'

## Creating ViT Model 

In [4]:
class ClassToken(Layer):
    def __init__(self):
        super().__init__()

    def build(self, input_shape):
        w_init = tf.random_normal_initializer()
        self.w = tf.Variable(
            initial_value = w_init(shape=(1, 1, input_shape[-1]), dtype=tf.float32),
            trainable = True
        )

    def call(self, inputs):
        batch_size = tf.shape(inputs)[0]
        hidden_dim = self.w.shape[-1]

        cls = tf.broadcast_to(self.w, [batch_size, 1, hidden_dim])
        cls = tf.cast(cls, dtype=inputs.dtype)
        return cls

In [5]:
def mlp(x, cf):
    print(cf)
    x = Dense(3072, activation="gelu")(x)
    x = Dropout(0.1)(x)
    x = Dense(768)(x)
    x = Dropout(0.1)(x)
    return x

def transformer_encoder(x, cf):
    skip_1 = x
    x = LayerNormalization()(x)
    x = MultiHeadAttention(
        num_heads=12, key_dim=768
    )(x, x)
    x = Add()([x, skip_1])

    skip_2 = x
    x = LayerNormalization()(x)
    x = mlp(x, cf)
    x = Add()([x, skip_2])

    return x

In [6]:
def ViT(cf):
    """ Inputs """
    input_shape = (cf["num_patches"], cf["patch_size"]*cf["patch_size"]*cf["num_channels"])
    inputs = Input(input_shape)     ## (None, 256, 3072)

    """ Patch + Position Embeddings """
    patch_embed = Dense(768)(inputs)   ## (None, 256, 768)

    positions = tf.range(start=0, limit=cf["num_patches"], delta=1)
    pos_embed = Embedding(input_dim=cf["num_patches"], output_dim=768)(positions) ## (256, 768)
    embed = patch_embed + pos_embed ## (None, 256, 768)

    """ Adding Class Token """
    token = ClassToken()(embed)
    x = Concatenate(axis=1)([token, embed]) ## (None, 257, 768)

    for _ in range(12):
        x = transformer_encoder(x, cf)

    """ Classification Head """
    x = LayerNormalization()(x)     ## (None, 257, 768)
    x = x[:, 0, :]
    x = Dense(cf["num_classes"], activation="softmax")(x)

    model = Model(inputs, x)
    return model

## HyperParams for ViT

In [7]:
##Not all are used ,some are directly using values, change accordingly
config = {}
config["num_layers"] = 12
config["hidden_dim"] = 768
config["mlp_dim"] = 3072
config["num_heads"] = 12
config["dropout_rate"] = 0.1
config["num_patches"] = 256
config["patch_size"] = 32
config["num_channels"] = 3
config["num_classes"] = 5

# model = ViT(config)
# model.summary()

## Pre-processing

### Setting Hyperparams for Dataset & Training

In [8]:
hp = {}
hp["image_size"] = 200 #200*200

hp["num_channels"] = 3

hp["patch_size"] = 25 #25*25,set such as hp["image_size"]%hp["patch_size"] == 0

hp["num_patches"] = (hp["image_size"]**2) // (hp["patch_size"]**2)

hp["flat_patches_shape"] = (hp["num_patches"], hp["patch_size"]*hp["patch_size"]*hp["num_channels"]) #(64,25*25*3)The input shape of Transformer

hp["batch_size"] = 16

hp["lr"] = 1e-4

hp["num_epochs"] = 100

hp["num_classes"] = 2
# hp["class_names"] = ["daisy", "dandelion", "roses", "sunflowers", "tulips"]

### Seeding

In [9]:
seed_constant = 42
np.random.seed(seed_constant)
random.seed(seed_constant)
tf.random.set_seed(seed_constant)

### Importing The data

In [10]:
path_videos = '/home/kirtan/Documents/FYProject/archive/Videos/Videos/'
path_frames = '/home/kirtan/Documents/FYProject/archive/Videos/Frames/'

### Splitting the data

In [11]:
x = np.arange(1, 105)
np.random.shuffle(x)
videos_validation = x[:16]
videos_test = x[16: 16+22]
videos_train = x[16+22:]
print(videos_train, len(videos_train))
print(videos_test, len(videos_test))
print(videos_validation, len(videos_validation))

[ 57  40  36  17  67  35   8  44  69  70  28  20  85  26  74  50  14  25
   4  18  39   9  82   7  68  37  91  84  55  51  71  47 100  62 101  97
  42  59  49  90  58  76  33  98  60  64 102  38  30   2  53  22   3  24
  88  95  75  87  83  21  61  72  15  93  52 103] 66
[ 81  34  13  27  99  56  23  77  45  73  16  43  41  10  86  12 104  79
  29  80   6  63] 22
[31 66 65 54 46 94 92 48 11  1 19 32 89 96 78  5] 16


### Creating Dataset Pipeline

In [12]:
filenames_train = []
labels_train = []
filenames_validation = []
labels_validation = []
filenames_test = []
labels_test = []

In [13]:
colab_list =[104, 211, 151, 451, 391, 241, 331, 241, 211, 181, 241, 181, 181, 211, 331, 211, 181, 151, 211, 181, 151, 181, 241, 181, 211, 181, 181, 181, 181, 181, 211, 151, 181, 151, 211, 181, 181, 181, 181, 151, 211, 181, 181, 241, 181, 181, 181, 181, 181, 181, 241, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 211, 181, 181, 211, 181, 181, 181, 181, 181, 181, 181, 211, 181, 181, 181, 181, 181, 181, 211, 301, 481, 391, 151, 391, 571, 481, 481, 751, 781, 331, 571, 511, 451, 151, 571, 691, 421, 241, 331, 421, 421, 391, 301, 391]
laptop_list=[]
parent_dir = '/home/kirtan/Documents/FYProject/archive/Videos/Frames/'

subdirectories = sorted([dirpath for dirpath, _, _ in os.walk(parent_dir)])

for dirpath in subdirectories:
    file_count = len(os.listdir(dirpath))
    laptop_list.append(file_count)

result = []

for element1, element2 in zip(laptop_list, colab_list):
    if element1 != element2:
        result.append(-1)
    else:
        result.append(element1)

print(result)

[104, 211, 151, 451, 391, 241, 331, 241, 211, 181, 241, 181, 181, 211, 331, 211, 181, 151, 211, 181, 151, 181, 241, 181, 211, 181, 181, 181, 181, 181, 211, 151, 181, 151, 211, 181, 181, 181, 181, 151, 211, 181, 181, 241, 181, 181, 181, 181, 181, 181, 241, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 211, 181, 181, 211, 181, 181, 181, 181, 181, 181, 181, 211, 181, 181, 181, 181, 181, 181, 211, 301, 481, 391, 151, 391, 571, 481, 481, 751, 781, 331, 571, 511, 451, 151, 571, 691, 421, 241, 331, 421, 421, 391, 301, 391]


In [14]:
for vid in videos_train:
    folder = path_frames + "video{}/".format(vid)
    frames = glob.glob(folder + 'frame*.jpg')
    frames = natsort.natsorted(frames)
    filenames_train = np.append(filenames_train,frames)
    labels_path = path_frames + "video{}/".format(vid) + "labels{}.npy".format(vid)
    labels_array = np.load(labels_path)
    labels_list = list(labels_array)
    labels_train = np.append(labels_train,labels_list)

filenames_train = np.array(filenames_train)
labels_validation = np.array(labels_validation)

for vid in videos_test:
    folder = path_frames + "video{}/".format(vid)
    frames = glob.glob(folder + 'frame*.jpg')
    frames = natsort.natsorted(frames)
    filenames_test = np.append(filenames_test,frames)
    labels_path = path_frames + "video{}/".format(vid) + "labels{}.npy".format(vid)
    labels_array = np.load(labels_path)
    labels_list = list(labels_array)
    labels_list = np.asarray(labels_list).astype('float32').reshape((-1,1))
    labels_test = np.append(labels_test,labels_list)
    
filenames_test = np.array(filenames_test)
labels_validation = np.array(labels_validation)

for vid in videos_validation:
    folder = path_frames + "video{}/".format(vid)
    frames = glob.glob(folder + 'frame*.jpg')
    frames = natsort.natsorted(frames)
    filenames_validation = np.append(filenames_validation,frames)
    labels_path = path_frames + "video{}/".format(vid) + "labels{}.npy".format(vid)
    labels_array = np.load(labels_path)
    labels_list = list(labels_array)
    labels_list = np.asarray(labels_list).astype('float32').reshape((-1,1))
    labels_validation = np.append(labels_validation,labels_list)

filenames_validation = np.array(filenames_validation)
labels_validation = np.array(labels_validation)

In [15]:
print(labels_validation.shape)

(4200,)


In [16]:
print(labels_validation)

[0. 0. 0. ... 0. 0. 0.]


In [17]:
print(filenames_train.shape, filenames_validation.shape, filenames_test.shape)
print(labels_train.shape, labels_validation.shape, labels_test.shape)

(16890,) (4200,) (5430,)
(16890,) (4200,) (5430,)


In [18]:
for i in filenames_train:
    print(i)
    break

/home/kirtan/Documents/FYProject/archive/Videos/Frames/video57/frame0.jpg


In [19]:
def create_dir(path):
    if not os.path.exists(path):
        print("Had to create new folder named files")
        os.makedirs(path)

In [20]:
create_dir("files")

In [21]:
def preprocess_image(image_path, label):
    print('entering 1st')
    image = cv2.imread(image_path.decode())
    print('entering 2nd')

    # Print image shape for debugging
    print('Image shape:', image.shape)

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = image.astype(np.float32)
    image = cv2.resize(image, (200, 200))
    print('Image processed')

    # Convert image to tf.Tensor
    image_tensor = tf.convert_to_tensor(image, dtype=tf.float32)

    # Apply random image transformations
    image_tensor = tf.image.random_brightness(image_tensor, 0.15)
    image_tensor = tf.image.random_contrast(image_tensor, 0.8, 1.5)
    image_tensor = tf.image.random_saturation(image_tensor, 0.6, 3)
    print('Image transformations applied')

    # Convert image back to NumPy array
    image_np = image_tensor.numpy()

    patches = patchify(image_np, (25, 25, 3), 25)
    patches = np.reshape(patches, (64, 25 * 25 * 3))
    patches = patches.astype(np.float32)

    # Print patch shape for debugging
    print('Patch shape:', patches.shape)

    return patches, np.int32(label)


In [22]:
def parse(path,labels):
    print('ENTERING PARSE1ST')
    patches,labels = tf.numpy_function(preprocess_image,[path,labels],[tf.float32,tf.int32])
    print('ENTERING PARSE2ND')
    print(patches.dtype)
    patches.set_shape((64,25*25*3))
    print('latest patches.shape,',patches.shape)
    return patches,labels

In [23]:
def tf_dataset(images,labels,batch=16):
    print("Running tf.dataset")
    ds = tf.data.Dataset.from_tensor_slices((images, labels))
    print('Entering all 1st')
    ds = ds.map(parse).batch(batch)
    return ds

In [36]:
dataset_train = tf_dataset(filenames_train,labels_train)
dataset_test = tf_dataset(filenames_test,labels_test)
dataset_val = tf_dataset(filenames_validation,labels_validation)

Running tf.dataset
Entering all 1st
ENTERING PARSE1ST
ENTERING PARSE2ND
<dtype: 'float32'>
latest patches.shape, (64, 1875)
Running tf.dataset
Entering all 1st
ENTERING PARSE1ST
ENTERING PARSE2ND
<dtype: 'float32'>
latest patches.shape, (64, 1875)
Running tf.dataset
Entering all 1st
ENTERING PARSE1ST
ENTERING PARSE2ND
<dtype: 'float32'>
latest patches.shape, (64, 1875)


In [25]:
model_path = os.path.join("files","model.h5")
csv_path = os.path.join("files","log.csv")

In [26]:
model = ViT(hp)
model.compile(
    loss="categorical_crossentropy",
    optimizer=tf.keras.optimizers.Adam(hp["lr"], clipvalue=1.0),
    metrics=["acc"]
)

{'image_size': 200, 'num_channels': 3, 'patch_size': 25, 'num_patches': 64, 'flat_patches_shape': (64, 1875), 'batch_size': 16, 'lr': 0.0001, 'num_epochs': 100, 'num_classes': 2}
{'image_size': 200, 'num_channels': 3, 'patch_size': 25, 'num_patches': 64, 'flat_patches_shape': (64, 1875), 'batch_size': 16, 'lr': 0.0001, 'num_epochs': 100, 'num_classes': 2}
{'image_size': 200, 'num_channels': 3, 'patch_size': 25, 'num_patches': 64, 'flat_patches_shape': (64, 1875), 'batch_size': 16, 'lr': 0.0001, 'num_epochs': 100, 'num_classes': 2}
{'image_size': 200, 'num_channels': 3, 'patch_size': 25, 'num_patches': 64, 'flat_patches_shape': (64, 1875), 'batch_size': 16, 'lr': 0.0001, 'num_epochs': 100, 'num_classes': 2}
{'image_size': 200, 'num_channels': 3, 'patch_size': 25, 'num_patches': 64, 'flat_patches_shape': (64, 1875), 'batch_size': 16, 'lr': 0.0001, 'num_epochs': 100, 'num_classes': 2}
{'image_size': 200, 'num_channels': 3, 'patch_size': 25, 'num_patches': 64, 'flat_patches_shape': (64, 18

In [27]:
callbacks = [
    ModelCheckpoint(model_path, monitor='val_loss', verbose=1, save_best_only=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, min_lr=1e-10, verbose=1),
    CSVLogger(csv_path),
    EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=False),
]

In [37]:
history = model.fit(
    dataset_train,
    epochs=100,
    validation_data=dataset_val,
    callbacks=callbacks
)

Epoch 1/100


TypeError: 'NoneType' object is not callable