In [None]:
# =============================================================================
# Created By:     Kai Metzger
# Created School: Franz-Oberthuer-Schule Wuerzburg
# Created Email:  metzgerkai@franz-oberthuer-schule.de
# Created Date:   Mon Dec 01 17:25 UTC 2025
# Version:        1.0.0
# =============================================================================
"""The Module has been build for training the symbols dataset with images + 
ground truth. Images have the base dimenstions of 640px x 480px in datasets 1-8
and then another 700 images with resolution 1920px x 1080px have been added to  
the subsequent datasets.
The files in the dataset <symbols> should be ordered in the following
manner:             > explanation
- symbols      
  - # dataset e.g. datset10 the corresponding classes are stored in folders 0-3:
    #     classA/     = cross       351
    #     classB/     = circle      348
    #     classC/     = triangle    350
    #     classD/     = rectangle   351
"""

# =============================================================================
# Import
# =============================================================================
import cv2
import os
import numpy as np
from keras import layers, models, utils, callbacks
import matplotlib.pyplot as plt
import keras
import tensorflow as tf
import tensorflow_hub as hub
from keras.optimizers.schedules import ExponentialDecay

# Enable Nvidia GPUs by un-commenting this line 
os.environ['CUDA_VISIBLE_DEVICES'] = "0"
XLA_FLAGS="--xla_gpu_cuda_data_dir=/usr/"


In [None]:
# =============================================================================
# Declare variables
# =============================================================================
# Change the following paths to your dataset path
home_dir = os.path.expanduser("~/git")
dataset_for_training = home_dir + "/ml_project/datasets/symbols/dataset10"

print("Dataset:", dataset_for_training)

In [None]:
# =============================================================================
# Dataset related
# =============================================================================
NUM_CLASSES = 4
IMG_SIZE_X = 160
IMG_SIZE_Y = 160
IMG_SIZE = (IMG_SIZE_X, IMG_SIZE_Y)

#INPUT_SHAPE = (IMG_SIZE_X, IMG_SIZE_Y, 1) # grayscale
INPUT_SHAPE = (IMG_SIZE_X, IMG_SIZE_Y, 3) # color

# =============================================================================
# Hyperparameters
# =============================================================================
BATCH_SIZE = 32
EPOCHS = 300
BASE_LEARNING_RATE = 1e-4 #0.0001

"""
# Set up an exponential decay learning rate schedule
LR_SCHEDULE = ExponentialDecay(
    initial_learning_rate=LEARNING_RATE,  # start with 0.001
    decay_steps=100000,           # how often to apply the decay
    decay_rate=0.96,              # decay rate
    staircase=True                # whether to apply the decay in steps
)

OPTIMIZER = keras.optimizers.Adam(learning_rate=LR_SCHEDULE)
"""

In [None]:
train_dataset = tf.keras.utils.image_dataset_from_directory(
    dataset_for_training,
    labels="inferred",
    label_mode="categorical",           # 4 Klassen → 1-hot
    shuffle=True,
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE
)

validation_dataset = tf.keras.utils.image_dataset_from_directory(
    dataset_for_training,
    labels="inferred",
    label_mode="categorical",
    shuffle=True,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE
)

In [None]:
class_names = train_dataset.class_names
NUM_CLASSES = len(class_names)
print("Klassen:", class_names, "\n")

print('Number of training batches: %d' % tf.data.experimental.cardinality(train_dataset))
print('Number of validation batches: %d' % tf.data.experimental.cardinality(validation_dataset))

AUTOTUNE = tf.data.AUTOTUNE
train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)
validation_dataset = validation_dataset.prefetch(tf.data.AUTOTUNE)
#TODO
#test_dataset = test_dataset.prefetch(buffer_size=AUTOTUNE)

In [None]:
# =============================================================================
# Dataset
# =============================================================================
# =============================================================================
# Data Augmentation
# =============================================================================
data_augmentation = keras.Sequential([
    #layers.Rescaling(scale=1/255.0), #TODO
    layers.RandomFlip("horizontal"),
    #layers.RandomFlip(),
    #layers.RandomRotation(factor=0.5),
    layers.RandomRotation(factor=0.2),
    #layers.RandomShear(),
    #layers.RandomColorJitter(),
    #layers.RandomZoom(height_factor=0.2, width_factor=0.2),
])

In [None]:
for image, _ in train_dataset.take(1):
  plt.figure(figsize=(10, 10))
  first_image = image[0]
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    augmented_image = data_augmentation(tf.expand_dims(first_image, 0))
    plt.imshow(augmented_image[0] / 255)
    plt.axis('off')


In [None]:
# TODO

# AUTOTUNE
# =============================================================================
# TensorBoard Logging
# =============================================================================
log_dir = "logs/fit"
tensorboard_callback = keras.callbacks.TensorBoard(
    log_dir=log_dir,
    histogram_freq=1
)

# Starten in Shell: tensorboard --logdir logs/fit

In [None]:
# =============================================================================
# MobileNetV2 (Transfer Learning)
# =============================================================================
base_model = tf.keras.applications.MobileNetV2(
#base_model = tf.keras.applications.MobileNetV3Large(
    input_shape=IMG_SIZE + (3,),
    include_top=False,
    weights="imagenet"
)

base_model.trainable = False

inputs = keras.Input(shape=IMG_SIZE + (3,))

x = data_augmentation(inputs)
x = tf.keras.applications.mobilenet_v2.preprocess_input(x)
#x = tf.keras.applications.mobilenet_v3.preprocess_input(x)

x = base_model(x, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.2)(x)

outputs = layers.Dense(NUM_CLASSES, activation="softmax")(x)

model = keras.Model(inputs, outputs)

In [None]:
model.compile(
    optimizer=keras.optimizers.Adam(BASE_LEARNING_RATE),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

model.summary()

In [None]:
# =============================================================================
# Training (Feature Extraction)
# =============================================================================
initial_epochs = 50

history = model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=initial_epochs,
    shuffle=True,
    callbacks=[tensorboard_callback],
)

# Starten in Shell: tensorboard --logdir logs/fit

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,1.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

In [None]:
# =============================================================================
# Fine Tuning
# =============================================================================
# Let's take a look to see how many layers are in the base model
print("Number of layers in the base model: ", len(base_model.layers))

base_model.trainable = True

# Fine-tune from this layer onwards
fine_tune_from = 100

for layer in base_model.layers[:fine_tune_from]:
    layer.trainable = False

model.compile(
    optimizer=keras.optimizers.RMSprop(learning_rate=BASE_LEARNING_RATE/10), # = 1e-5
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

fine_tune_epochs = 20
total_epochs = initial_epochs + fine_tune_epochs

history_fine = model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=total_epochs,
    initial_epoch=len(history.epoch),
    #initial_epoch=history.epoch[-1],
    shuffle=True
)

In [None]:
acc += history_fine.history['accuracy']
val_acc += history_fine.history['val_accuracy']

loss += history_fine.history['loss']
val_loss += history_fine.history['val_loss']

In [None]:
plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.ylim([0.6, 1])
plt.plot([initial_epochs-1,initial_epochs-1],
          plt.ylim(), label='Start Fine Tuning')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.ylim([0, 1.0])
plt.plot([initial_epochs-1,initial_epochs-1],
         plt.ylim(), label='Start Fine Tuning')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

In [None]:
# =============================================================================
# Test Accuracy (Validation = Test)
# =============================================================================
loss, acc = model.evaluate(validation_dataset)
#TODO loss, accuracy = model.evaluate(test_dataset)

print("Test Accuracy:", acc)

In [None]:
# =============================================================================
# Konfusionsmatrix + Klassifizierungsreport
# =============================================================================
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np
import seaborn as sns

# Daten aus Validation/Test holen
y_true = []
y_pred = []

for images, labels in validation_dataset:
    preds = model.predict(images)
    y_true.extend(np.argmax(labels.numpy(), axis=1))
    y_pred.extend(np.argmax(preds, axis=1))

y_true = np.array(y_true)
y_pred = np.array(y_pred)

# Klassenliste
print("Klassen:", class_names)

# Confusion Matrix
cm = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=class_names,
            yticklabels=class_names)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()

# Classification Report
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=class_names))

In [None]:
# =============================================================================
# Modell speichern
# =============================================================================
keras.saving.save_model(model, "model.keras")

print("Modelle gespeichert: model.keras")

In [None]:
# =============================================================================
# Modell laden
# =============================================================================
loaded_model = keras.models.load_model("model.keras")

print("Modelle erfolgreich geladen!")


In [None]:
# =============================================================================
# Grad-CAM Visualisierung
# =============================================================================

# ============================================================
# 1. Extract MobileNetV2 backbone
# ============================================================
backbone = model.get_layer("mobilenetv2_1.00_160")

# Find last conv layer inside MobileNetV2
last_conv = None
for layer in reversed(backbone.layers):
    if isinstance(layer, tf.keras.layers.Conv2D):
        last_conv = layer
        break

print("Last conv layer:", last_conv.name)


# ============================================================
# 2. Create a Grad-CAM model for MobileNetV2 only
# ============================================================
grad_model = tf.keras.models.Model(
    inputs=backbone.input,            # internal input of MobileNetV2
    outputs=[last_conv.output,        # feature map
             backbone.output]         # MobileNetV2 embeddings
)


# ============================================================
# 3. Extract and preprocess image manually (bypass Sequential)
# ============================================================
example_img, _ = next(iter(validation_dataset))
example_img = example_img[0].numpy() # shape: (H,W,3)

# Your model expects raw 0–255
if example_img.max() <= 1.0:
    example_img = example_img * 255.

# Apply SAME preprocessing as your Sequential layer
# (Modify these lines to match your true_divide and subtract!)
#img_pp = example_img / 255.0          # true_divide
#img_pp = img_pp - 0.5                 # subtract (CHANGE THIS if needed)
# Apply Rescaling(1./127.5, offset=-1) manually
img_pp = (example_img / 127.5) - 1

# final MobileNetV2 input
input_backbone = np.expand_dims(img_pp, 0)


# ============================================================
# 4. Run Grad-CAM on the backbone
# ============================================================
with tf.GradientTape() as tape:
    conv_outputs, features = grad_model(input_backbone)
    conv_outputs = conv_outputs[0]

# use full model for final class prediction
full_pred = model(np.expand_dims(example_img, 0))
pred_index = tf.argmax(full_pred[0]).numpy()
print(full_pred)

# MobileNetV2 embeddings → Dense layer → final class  
dense_weights = model.layers[-1].get_weights()[0]     # (1280, num_classes)
weights_for_class = dense_weights[:, pred_index]       # shape (1280,)

# Compute Grad-CAM manually from MobileNetV2 embeddings
grads = weights_for_class
heatmap = tf.reduce_sum(tf.multiply(conv_outputs, grads), axis=-1)

heatmap = np.maximum(heatmap, 0)
heatmap /= heatmap.max() + 1e-8


# ============================================================
# 5. Visualization
# ============================================================
img_uint8 = example_img.astype("uint8")

heatmap_resized = cv2.resize(heatmap, (img_uint8.shape[1], img_uint8.shape[0]))
heatmap_uint8 = np.uint8(255 * heatmap_resized)
heatmap_color = cv2.applyColorMap(heatmap_uint8, cv2.COLORMAP_JET)

overlay = cv2.addWeighted(img_uint8, 0.6, heatmap_color, 0.4, 0)


plt.figure(figsize=(12, 5))

plt.subplot(1, 3, 1)
plt.title("Original")
plt.imshow(img_uint8)
plt.axis("off")

plt.subplot(1, 3, 2)
plt.title("Grad-CAM Heatmap")
plt.imshow(heatmap_resized, cmap='jet')
plt.axis("off")

plt.subplot(1, 3, 3)
plt.title("Overlay")
plt.imshow(overlay)
plt.axis("off")

plt.show()

print("Predicted class:", class_names[pred_index])


In [None]:
val_batches = tf.data.experimental.cardinality(validation_dataset)
test_dataset = validation_dataset.take(val_batches // 5)
validation_dataset = validation_dataset.skip(val_batches // 5)


In [None]:
print('Number of validation batches: %d' % tf.data.experimental.cardinality(validation_dataset))
print('Number of test batches: %d' % tf.data.experimental.cardinality(test_dataset))


In [None]:
AUTOTUNE = tf.data.AUTOTUNE

train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
validation_dataset = validation_dataset.prefetch(buffer_size=AUTOTUNE)
test_dataset = test_dataset.prefetch(buffer_size=AUTOTUNE)


In [None]:
data_augmentation = tf.keras.Sequential([
  tf.keras.layers.RandomFlip('horizontal'),
  tf.keras.layers.RandomRotation(0.2),
])

In [None]:
for image, _ in train_dataset.take(1):
  plt.figure(figsize=(10, 10))
  first_image = image[0]
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    augmented_image = data_augmentation(tf.expand_dims(first_image, 0))
    plt.imshow(augmented_image[0] / 255)
    plt.axis('off')


In [None]:
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input

# Many pretrained models (e.g., MobileNet, Xception, EfficientNet) expect input in [-1, 1].
rescale = tf.keras.layers.Rescaling(1./127.5, offset=-1)
#rescale = tf.keras.layers.Rescaling(1./255.)

In [None]:
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
feature_batch_average = global_average_layer(feature_batch)
print(feature_batch_average.shape)

In [None]:
prediction_layer = tf.keras.layers.Dense(1, activation='sigmoid')
prediction_batch = prediction_layer(feature_batch_average)
print(prediction_batch.shape)


In [None]:
inputs = tf.keras.Input(shape=IMG_SHAPE)
x = data_augmentation(inputs)
x = preprocess_input(x)
x = base_model(x, training=False)
x = global_average_layer(x)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = prediction_layer(x)
model = tf.keras.Model(inputs, outputs)
