In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import cv2

# CONFIGURATION
IMAGE_SIZE = 128
PATCH_SIZE = 8
NUM_CLASSES = len(os.listdir("c:/MyData"))  # assuming folders represent classes
EMBED_DIM = 64
NUM_PATCHES = (IMAGE_SIZE // PATCH_SIZE) ** 2
BATCH_SIZE = 32
EPOCHS = 20

# Load CSV labels
labels_df = pd.read_csv("c:/Data/Label/Labels.csv")  # columns: filename, class, xmin, ymin, xmax, ymax

# Load and preprocess data
def load_data():
    images, class_labels, bbox_labels = [], [], []
    for _, row in labels_df.iterrows():
        img_path = os.path.join("c:/MyData", row['class'], row['filename'])
        if not os.path.exists(img_path):
            continue
        img = cv2.imread(img_path)
        img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
        images.append(img / 255.0)
        class_labels.append(row['class'])
        bbox = [row['xmin'], row['ymin'], row['xmax'], row['ymax']]
        bbox_labels.append(bbox)

    images = np.array(images)
    bbox_labels = np.array(bbox_labels) / IMAGE_SIZE  # normalize to 0-1
    class_indices = {cls: i for i, cls in enumerate(sorted(set(class_labels)))}
    class_labels = np.array([class_indices[cls] for cls in class_labels])
    return images, class_labels, bbox_labels, class_indices

images, class_labels, bbox_labels, class_map = load_data()
X_train, X_test, y_class_train, y_class_test, y_bbox_train, y_bbox_test = train_test_split(
    images, class_labels, bbox_labels, test_size=0.2, random_state=42)

# PATCHING
class Patching(layers.Layer):
    def __init__(self, patch_size):
        super().__init__()
        self.patch_size = patch_size

    def call(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding='VALID',
        )
        patch_dim = patches.shape[-1]
        patches = tf.reshape(patches, [batch_size, -1, patch_dim])
        return patches

# Positional Embedding
class PositionEmbedding(layers.Layer):
    def __init__(self, num_patches, embed_dim):
        super().__init__()
        self.pos_emb = self.add_weight("pos_emb", shape=[1, num_patches, embed_dim])

    def call(self, x):
        return x + self.pos_emb

# Depthwise Separable Convolution block
class DSC(layers.Layer):
    def __init__(self):
        super().__init__()
        self.depthwise = layers.DepthwiseConv2D(3, padding='same', activation='relu')
        self.pointwise = layers.Conv2D(EMBED_DIM, 1, activation='relu')

    def call(self, x):
        x = tf.reshape(x, [-1, IMAGE_SIZE//PATCH_SIZE, IMAGE_SIZE//PATCH_SIZE, EMBED_DIM])
        x = self.depthwise(x)
        x = self.pointwise(x)
        return tf.reshape(x, [-1, NUM_PATCHES, EMBED_DIM])

# Depthwise Separable MLP
class DS_MLP(layers.Layer):
    def __init__(self, units):
        super().__init__()
        self.dw1 = layers.SeparableConv1D(units, 3, padding='same', activation='relu')
        self.dw2 = layers.SeparableConv1D(units, 3, padding='same')

    def call(self, x):
        x = self.dw1(x)
        x = self.dw2(x)
        return x

# Very Gated Multi-head Attention replacement (VGMA)
class VGMA(layers.Layer):
    def __init__(self, embed_dim):
        super().__init__()
        self.dense = layers.Dense(embed_dim, activation='relu')

    def call(self, x):
        return self.dense(x) * tf.sigmoid(self.dense(x))

# Transformer Block
def transformer_block(x):
    shortcut = x
    x = layers.LayerNormalization()(x)
    x = VGMA(EMBED_DIM)(x)
    x = layers.Add()([x, shortcut])

    shortcut = x
    x = layers.LayerNormalization()(x)
    x = DSC()(x)
    x = DS_MLP(EMBED_DIM)(x)
    x = layers.Add()([x, shortcut])
    return x

# Build model
inputs = keras.Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
x = Patching(PATCH_SIZE)(inputs)
x = layers.Dense(EMBED_DIM)(x)
x = PositionEmbedding(NUM_PATCHES, EMBED_DIM)(x)
x = transformer_block(x)

x = layers.GlobalAveragePooling1D()(x)

# Parallel heads
bbox_output = layers.Dense(4, activation='sigmoid', name='bbox')(x)
class_output = layers.Dense(NUM_CLASSES, activation='softmax', name='class')(x)

model = keras.Model(inputs, [bbox_output, class_output])
model.compile(optimizer='adam',
              loss={'bbox': 'mse', 'class': 'sparse_categorical_crossentropy'},
              metrics={'bbox': 'mae', 'class': 'accuracy'})

# Training
history = model.fit(
    X_train, {'bbox': y_bbox_train, 'class': y_class_train},
    validation_data=(X_test, {'bbox': y_bbox_test, 'class': y_class_test}),
    epochs=EPOCHS, batch_size=BATCH_SIZE
)

# Evaluation
model.evaluate(X_test, {'bbox': y_bbox_test, 'class': y_class_test})

# Plot Accuracy/Loss
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['class_accuracy'], label='Train Acc')
plt.plot(history.history['val_class_accuracy'], label='Val Acc')
plt.legend()
plt.title('Classification Accuracy')

plt.subplot(1, 2, 2)
plt.plot(history.history['class_loss'], label='Train Loss')
plt.plot(history.history['val_class_loss'], label='Val Loss')
plt.legend()
plt.title('Classification Loss')
plt.show()

# Prediction + Non-Max Suppression
pred_bboxes, pred_classes = model.predict(X_test)

for i in range(5):  # visualize top 5
    img = X_test[i].copy()
    class_id = np.argmax(pred_classes[i])
    score = np.max(pred_classes[i])
    if score < 0.5:
        continue
    bbox = pred_bboxes[i] * IMAGE_SIZE
    x1, y1, x2, y2 = bbox.astype(int)
    cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2)
    label = list(class_map.keys())[list(class_map.values()).index(class_id)]
    cv2.putText(img, f"{label} ({score:.2f})", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
    plt.imshow(img)
    plt.title("Prediction")
    plt.axis('off')
    plt.show()