<a href="https://colab.research.google.com/github/mesh98a/DeepLearning/blob/main/labN3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow import keras

print(tf.version.VERSION)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
from torchvision import transforms
from torch.utils.data import DataLoader
from sklearn.metrics import precision_recall_curve, average_precision_score
from sklearn.preprocessing import label_binarize
import matplotlib.pyplot as plt
from tensorflow.keras.utils import to_categorical, plot_model, image_dataset_from_directory
from tensorflow.keras.models import Sequential,Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout, Rescaling
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras import Input
from tensorflow.keras.preprocessing import image

downloading archive with images

In [None]:
!wget -O images.zip "https://data.caltech.edu/records/mzrjq-6wc02/files/caltech-101.zip?download=1"

In [None]:
!mkdir -p cnn
!mkdir -p cnn/images
!unzip -qq images.zip -d cnn/images
!tar -xzf cnn/images/caltech-101/101_ObjectCategories.tar.gz -C cnn/images
#removing temp folder __MACOS**
!rm -rf cnn/images/__**
!rm -rf cnn/images/caltech-101 # && rmdir cnn/images/caltech-101

In [None]:
import os
import shutil

# Source and destination paths
src_base = 'cnn/images/101_ObjectCategories'
dst_base = 'cnn/5_classes'

# Create destination directory
os.makedirs(dst_base, exist_ok=True)

# List of desired classes
selected_classes = ['watch', 'bonsai', 'kangaroo', 'brain', 'ketch']

# Copy the folders of the selected classes
for cls in selected_classes:
    src_path = os.path.join(src_base, cls)
    dst_path = os.path.join(dst_base, cls)
    if os.path.exists(src_path):
      if os.path.exists(dst_path):
        shutil.rmtree(dst_path)
      shutil.copytree(src_path, dst_path)
    else:
        print(f"Class {cls} was not found.")

In [None]:
import pathlib

data_dir = pathlib.Path('cnn/5_classes').with_suffix('')

image_count = len(list(data_dir.glob('**/*.jpg')))
print(f'Images: {image_count}')

Let's find the smallest dimensions. In the future, I plan to use this to normalize the size of images, because for the network we need data with the same resolution

In [None]:
import PIL

images = [PIL.Image.open(str(image)) for image in list(data_dir.glob('*/*'))]


heights = [i.height for i in images ]
widths = [i.width for i in images ]

min_height = min(heights)
min_width = min(widths)
print(f'Minimal height {min_height}')
print(f'Minimal width {min_width}')

Visualize several images

In [None]:
plt.subplot(1,2,1)
plt.imshow(images[100])
plt.axis("off")

plt.subplot(1,2,2)
plt.imshow(images[200])
plt.axis("off")

plt.show()


# **Data Preproccesing**

.unbatch() Breaks the batches into individual (image, label) pairs.
After this, the dataset yields one image and one label at a time instead of batches

In [None]:
# https://massedcompute.com/faq-answers/?question=What%20is%20the%20optimal%20image%20size%20for%20training%20a%20CNN%20model%20for%20image%20recognition?
min_height = 197
min_width = 300

dataset = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    labels='inferred',                # Automatically infer labels from subdirectory names
    label_mode='int',                 # Labels are returned as integers
    image_size=(min_height, min_width),  # Resize images to 224x224
    batch_size=85,                    # Load images in batches
    shuffle=True                      # Shuffle the dataset
).unbatch()


In [None]:
plt.figure(figsize=(12, 8))
i = 1
for image, label in dataset.take(9):  # Take the first 9 images from the dataset
    plt.subplot(3, 3, i)  # Create a 3x3 grid of subplots
    i += 1
    plt.imshow(image.numpy().astype("uint8"))  # Convert tensor to NumPy and display the image
    plt.title(f"Label: {label}")  # Show the label as the title
    plt.axis("off")  # Hide the axes for a cleaner look

plt.show()

In [None]:
train_ds = dataset.take(400)
val_ds = dataset.skip(400).take(100)
test_ds = dataset.skip(500)

print(f'Train: {train_ds.reduce(0, lambda x, _: x + 1)}')
print(f'Val: {val_ds.reduce(0, lambda x, _: x + 1)}')
print(f'Test: {test_ds.reduce(0, lambda x, _: x + 1)}')

train_ds = train_ds.batch(50)
val_ds = val_ds.batch(50)
test_ds = test_ds.batch(50)



# Build Deep Learning Model

Plain 3-layer CNN

In [None]:
def buil_model(input_shape=(197, 300, 3), num_classes=5):
  input_tensor = tf.keras.Input(shape=input_shape)

  x = Rescaling(1./255)(input_tensor)
  x = Conv2D(32, (3, 3), activation='relu')(x)
  x = MaxPooling2D(2,2)(x)
  x = Conv2D(64, (3,3), 1, activation='relu')(x)
  x = MaxPooling2D(2,2)(x)
  x = Conv2D(128, (3,3), 1, activation='relu')(x)
  x = MaxPooling2D(2,2)(x)

  x = Flatten()(x)
  x = Dense(64, activation='relu')(x)
  output_tensor = Dense(num_classes, activation='softmax')(x)

  model = Model(inputs=input_tensor, outputs=output_tensor)

  model.compile(optimizer='adam',loss=SparseCategoricalCrossentropy(from_logits=True),metrics=['accuracy'])
  model.summary()
  return model



model = buil_model()
history = model.fit(
    train_ds,
    epochs=10,
    validation_data=val_ds)

In [None]:
def evaluate_model(model, test_data):
    true_labels = []           # To store the actual labels from the test data
    predicted_labels = []      # To store the predicted class indices
    predicted_proba = []       # To store the predicted class probabilities (confidence)
    predictions_full = []      # To store the full prediction vectors (all class probabilities)

    for images, labels in test_data:
        predictions = model.predict(images)  # Get prediction probabilities from the model
        predicted_classes = np.argmax(predictions, axis=1)  # Get predicted class index (highest probability)
        predicted_probabilities = np.max(predictions, axis=1)  # Get max probability for each prediction

        true_labels.extend(labels.numpy())  # Add true labels to the list
        predicted_labels.extend(predicted_classes)  # Add predicted labels to the list
        predicted_proba.extend(predicted_probabilities)  # Add predicted confidences
        predictions_full.extend(predictions)  # Add full prediction vectors

    return true_labels, predicted_labels, predicted_proba, predictions_full

# ConfusionMatrix
def print_report(true_labels, predicted_labels, predicted_proba):
    report = classification_report(true_labels, predicted_labels)
    print(report)
    cn = ConfusionMatrixDisplay(confusion_matrix=confusion_matrix(true_labels, predicted_labels))
    cn.plot()

def learning_curves(history):
    plt.plot(history.history['accuracy'], label='accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(loc='lower right')
    plt.show()

def plot_multiclass_roc(true_labels, predicted_scores, title='ROC Curves for All Classes'):

    y_test = np.array(true_labels)
    y_score = np.array(predicted_scores)

    n_classes = y_score.shape[1]
    classes = list(range(n_classes))

    # Binarize labels for one-vs-rest comparison
    y_test_bin = label_binarize(y_test, classes=classes)

    plt.figure(figsize=(8, 6))
    colors = plt.cm.get_cmap("tab10", n_classes)

    for i in range(n_classes):
        fpr, tpr, _ = roc_curve(y_test_bin[:, i], y_score[:, i])
        roc_auc = auc(fpr, tpr)
        plt.plot(fpr, tpr, lw=2, label=f'Class {i} (AUC = {roc_auc:.2f})', color=colors(i))

    plt.plot([0, 1], [0, 1], 'k--', lw=1)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(title)
    plt.legend(loc='lower right')
    plt.grid(True)
    plt.tight_layout()
    plt.show()

from sklearn.metrics import precision_recall_curve, average_precision_score
from sklearn.preprocessing import label_binarize
import numpy as np
import matplotlib.pyplot as plt

def plot_multiclass_precision_recall(true_labels, predicted_scores, title='Precision-Recall Curves (AUC-PR) for All Classes'):

    y_test = np.array(true_labels)
    y_score = np.array(predicted_scores)

    n_classes = y_score.shape[1]
    classes = list(range(n_classes))

    # Binarize labels for one-vs-rest comparison
    y_test_bin = label_binarize(y_test, classes=classes)

    plt.figure(figsize=(8, 6))
    colors = plt.cm.get_cmap("tab10", n_classes)

    for i in range(n_classes):
        precision, recall, _ = precision_recall_curve(y_test_bin[:, i], y_score[:, i])
        ap_score = average_precision_score(y_test_bin[:, i], y_score[:, i])
        plt.plot(recall, precision, lw=2,
                 label=f'Class {i} (AP = {ap_score:.2f})',
                 color=colors(i))

    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title(title)
    plt.legend(loc='best')
    plt.grid(True)
    plt.tight_layout()
    plt.show()

def print_top1_error(true_labels, predicted_scores):
    y_test = np.array(true_labels)
    y_score = np.array(predicted_scores)

    y_pred = np.argmax(y_score, axis=1)
    acc = accuracy_score(y_test, y_pred)

    top1_error = (1 - acc) * 100
    print(f"Top-1 Error Rate: {top1_error:.2f}%")

**RESULTS for Plain 3-layer**

In [None]:
true_labels, predicted_labels, predicted_proba,predictions_full = evaluate_model(model, test_ds)
print_report(true_labels, predicted_labels, predicted_proba)

In [None]:
learning_curves(history)
plot_multiclass_roc(true_labels, predictions_full)
plot_multiclass_precision_recall(true_labels, predictions_full)

In [None]:
print_top1_error(true_labels, predictions_full)

# **Frozen pre-trained ResNet as feature extractor**

In [None]:
def preprocess_batch(image, label):
    image = tf.keras.applications.resnet.preprocess_input(image)
    return image, label

train_ds_resnet_frozen = train_ds.map(preprocess_batch)
val_ds_resnet_frozen = val_ds.map(preprocess_batch)
test_ds_resnet_frozen = test_ds.map(preprocess_batch)

input_shape = (197, 300, 3)
num_classes = 5
base_model = tf.keras.applications.ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)

base_model.trainable = False

model_resnet_frozen = tf.keras.models.Sequential([
    base_model,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])


model_resnet_frozen.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model_resnet_frozen.summary()

In [None]:

history_resnet_frozen = model_resnet_frozen.fit(
    train_ds_resnet_frozen,
    epochs=5,
    validation_data=val_ds_resnet_frozen)

In [None]:
true_labels2, predicted_labels2, predicted_proba2,predictions_full2 = evaluate_model(model, test_ds_resnet_frozen)
print_report(true_labels2, predicted_labels2, predicted_proba2)

In [None]:
learning_curves(history_resnet_frozen)
plot_multiclass_roc(true_labels2, predictions_full2)
plot_multiclass_precision_recall(true_labels2, predictions_full2)

In [None]:
print_top1_error(true_labels, predictions_full)

# **Fine-tuned pre-trained ResNet**

In [None]:
def preprocess_batch(image, label):
    image = tf.keras.applications.resnet.preprocess_input(image)  # ResNet expects this preprocessing
    return image, label
train_ds_resnet = train_ds.map(preprocess_batch)
test_ds_resnet = test_ds.map(preprocess_batch)
val_ds_resnet = val_ds.map(preprocess_batch)

input_shape = (197, 300, 3)
num_classes = 5
base_model = tf.keras.applications.ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)

base_model.trainable = False

model_resnet= tf.keras.models.Sequential([
    base_model,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

model_resnet.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)
model_resnet.summary()

In [None]:
history_resnet = model_resnet.fit(
    train_ds_resnet,
    epochs=3,
    validation_data=val_ds_resnet)

In [None]:
model_resnet.summary()

In [None]:
base_model.trainable = True

model_resnet.compile(optimizer=tf.keras.optimizers.Adam(1e-5),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# fine-tuning!
model_resnet.fit(train_ds_resnet, validation_data=val_ds_resnet, epochs=3)

In [None]:
true_labels3, predicted_labels3, predicted_proba3,predictions_full3 = evaluate_model(model_resnet, test_ds_resnet)
print_report(true_labels3, predicted_labels3, predicted_proba3)

In [None]:
learning_curves(history_resnet)
plot_multiclass_roc(true_labels3, predictions_full3)
plot_multiclass_precision_recall(true_labels3, predictions_full3)

# Visualize filters and corresponding feature maps of the first and second Conv2d layers

In [None]:
def print_filters(filters, title, n=6, channel=0):
    print(f'\n {title}')
    for i in range(n):
        f = filters[:, :, channel, i]
        print(f"\nFilter #{i}:")
        print(np.round(f, 3))

def visualize_filters(filters, title, n=6, channel=0):
    fig, axes = plt.subplots(1, n, figsize=(n * 2, 2))
    for i in range(n):
        f = filters[:, :, channel, i]
        axes[i].imshow(f, cmap='gray')
        axes[i].axis('off')
    plt.suptitle(title)
    plt.show()

# Get Conv2D layers
conv_layers = [layer for layer in model.layers if isinstance(layer, tf.keras.layers.Conv2D)]

# Visualize filters of the 1st and 2nd Conv2D layers
filters1, _ = conv_layers[0].get_weights()
filters2, _ = conv_layers[1].get_weights()

# Print some filters to view them numerically
print_filters(filters1, 'First 6 filters of the 1st Conv2D layer', 6)
print_filters(filters2, 'First 6 filters of the 2nd Conv2D layer', 6)

# Visualize as grayscale images (higher values appear darker)
visualize_filters(filters1, 'First 6 filters of the 1st Conv2D layer', 6)
visualize_filters(filters2, 'First 6 filters of the 2nd Conv2D layer', 6)

**a)**
1st layer) The filters appear to have captured basic visual patterns, such as edges, contrast changes, and texture information.Some filters capture horizontal or vertical lines, while others detect color-specific regions.

2st layer) These filters extract more complex patterns, such as shapes or combinations of edges. They seem to build upon the features from the 1st layer to identify higher-level abstractions

**b)**
Yes. In Layer 2, a few filters appear nearly entirely black or uniformly dark. I think I have too little data.

**c)**
Yes, based on the activation maps, some filters clearly activate in background regions (trees, water, sky).

**d)** Yes, the model is controlled by RGB

In [None]:
# Pfad zum konkreten Bild
#img_path = "/content/cnn/5_classes/kangaroo/image_0008.jpg"
#img = image.load_img(img_path, target_size=(197, 300))  # Passe target_size an dein Modell an
#img_array = image.img_to_array(img)
#img_array = img_array / 255.0
image_batch, _ = next(iter(dataset.take(1)))
input_image = np.expand_dims(image_batch, axis=0)


conv_layers = [layer for layer in model.layers if isinstance(layer, tf.keras.layers.Conv2D)]

# model to get the feature maps from the first two Conv2D layers
activation_model = tf.keras.models.Model(
    inputs=model.input,
    outputs=[conv_layers[0].output, conv_layers[1].output]
)

# feature maps
feature_maps = activation_model.predict(input_image)


def plot_feature_maps(fmaps, title, n=6):
    fmap = fmaps[0]
    fig, axes = plt.subplots(1, n, figsize=(n * 2, 2))
    for i in range(n):
        axes[i].imshow(fmap[:, :, i], cmap='viridis')
        axes[i].axis('off')
    plt.suptitle(title)
    plt.show()

plot_feature_maps(feature_maps[0], "First 6 filters of the 1st Conv2D layer", 6)
plot_feature_maps(feature_maps[1], "First 6 filters of the 2st Conv2D layer", 6)

# Conclusion

The plain 3-layer CNN already demonstrated strong performance, which I believe is due to the small dataset size of only 665 images. When comparing the fine-tuned pre-trained ResNet to the frozen pre-trained ResNet used as a feature extractor, the accuracy increased significantly—from 76% to 99%. The final model correctly classified 164 out of 165 test images.
It can also be observed that both the fine-tuned pre-trained ResNet and the frozen pre-trained ResNet are highly prone to overfitting—even after just 2 epochs.