In [1]:
import os
import json
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, GlobalAveragePooling2D


In [12]:
import shutil
import zipfile
import kagglehub



# 1. Download dataset
path = kagglehub.dataset_download("tongpython/cat-and-dog")
print("Downloaded dataset folder:", path)

# 2. Target directory (relative to notebook)
target_dir = "../data/user1"
os.makedirs(target_dir, exist_ok=True)

# 3. Copy entire dataset directory into target_dir
for item in os.listdir(path):
    src = os.path.join(path, item)
    dst = os.path.join(target_dir, item)

    if os.path.isdir(src):
        if os.path.exists(dst):
            shutil.rmtree(dst)    # remove if exists
        shutil.copytree(src, dst)
    else:
        shutil.copy2(src, dst)

print(f"Dataset copied successfully to: {target_dir}")

Downloaded dataset folder: C:\Users\rainer\.cache\kagglehub\datasets\tongpython\cat-and-dog\versions\1
Dataset copied successfully to: ../data/user1


In [None]:
# --- ML Code: Preprocessing ---
IMG_SIZE = (128, 128)
BATCH_SIZE = 32

print("--- User 1 Data Structure ---")
!dir ..\\data\\user1 /s



--- User 1 Data Structure ---
 Volume in drive C has no label.
 Volume Serial Number is 201B-6403

 Directory of c:\Users\rainer\Documents\Code\collaborative_cnn_team01\notebooks\data\user1

20-11-2025  11:40    <DIR>          .
20-11-2025  11:40    <DIR>          ..
               0 File(s)              0 bytes

     Total Files Listed:
               0 File(s)              0 bytes
               2 Dir(s)  133,664,878,592 bytes free


In [16]:
DATA_DIR_USER1 = '../data/user1/training_set/training_set'

datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
train_generator_u1 = datagen.flow_from_directory(
    DATA_DIR_USER1,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary', # Changed to binary for Cat vs Dog
    subset='training'
)
validation_generator_u1 = datagen.flow_from_directory(
    DATA_DIR_USER1,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary', # Changed to binary
    subset='validation'
)

Found 6404 images belonging to 2 classes.
Found 1601 images belonging to 2 classes.


In [7]:
DATA_DIR_USER1 = 'data/user1/training_set/training_set'

datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
train_generator_u1 = datagen.flow_from_directory(
    DATA_DIR_USER1,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary', # Changed to binary for Cat vs Dog
    subset='training'
)
validation_generator_u1 = datagen.flow_from_directory(
    DATA_DIR_USER1,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary', # Changed to binary
    subset='validation'
)


Found 6404 images belonging to 2 classes.
Found 1601 images belonging to 2 classes.


In [17]:


# --- ML Code: Train Model V1 ---
model_v1 = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)),
    MaxPooling2D(pool_size=(3, 3)),

    # Block 2 (Simulating residual behavior by stacking layers)
    Conv2D(64, (3, 3), activation='relu', padding='same'),
    MaxPooling2D(pool_size=(3, 3)),

    # Block 3
    Conv2D(64, (3, 3), activation='relu'),
    GlobalAveragePooling2D(),

    # Flattened Layer
    Dense(128, activation='relu'),

    Dense(1, activation='sigmoid')  # Binary classification
])
model_v1.compile(optimizer='adam',
                  loss='binary_crossentropy', # Changed for binary
                  metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [18]:
model_v1.summary()

In [19]:
history_v1 = model_v1.fit(train_generator_u1, epochs=5, validation_data=validation_generator_u1)


Epoch 1/5
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m156s[0m 759ms/step - accuracy: 0.5283 - loss: 0.6855 - val_accuracy: 0.5971 - val_loss: 0.6577
Epoch 2/5
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 166ms/step - accuracy: 0.5981 - loss: 0.6585 - val_accuracy: 0.6096 - val_loss: 0.6480
Epoch 3/5
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 168ms/step - accuracy: 0.6113 - loss: 0.6459 - val_accuracy: 0.6652 - val_loss: 0.6255
Epoch 4/5
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 167ms/step - accuracy: 0.6416 - loss: 0.6245 - val_accuracy: 0.6777 - val_loss: 0.6002
Epoch 5/5
[1m201/201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 167ms/step - accuracy: 0.6821 - loss: 0.5995 - val_accuracy: 0.6908 - val_loss: 0.5722


In [21]:

# --- Print metrics before saving ---
print("\n===== TRAIN METRICS =====")
if "loss" in history_v1.history:
    print(f"Final Train Loss      : {history_v1.history['loss'][-1]:.4f}")
if "accuracy" in history_v1.history:
    print(f"Final Train Accuracy  : {history_v1.history['accuracy'][-1]:.4f}")

print("\n===== VALIDATION METRICS =====")
if "val_loss" in history_v1.history:
    print(f"Final Val Loss        : {history_v1.history['val_loss'][-1]:.4f}")
if "val_accuracy" in history_v1.history:
    print(f"Final Val Accuracy    : {history_v1.history['val_accuracy'][-1]:.4f}")



===== TRAIN METRICS =====
Final Train Loss      : 0.5995
Final Train Accuracy  : 0.6821

===== VALIDATION METRICS =====
Final Val Loss        : 0.5722
Final Val Accuracy    : 0.6908


In [23]:
# --- Ensure directories exist ---
os.makedirs("../models", exist_ok=True)
os.makedirs("../results", exist_ok=True)

# --- Save model ---
model_v1.save("../models/model_v1.h5")
print("\nSaved model_v1.h5 to models/")

# --- Save ALL metrics (not only last epoch) ---
metrics_v1_data = {}

for key, values in history_v1.history.items():
    # Convert numpy.float32 → Python float
    metrics_v1_data[key] = [float(v) for v in values]

# Save final epoch separately for convenience
metrics_v1_data["final"] = {
    "train_accuracy": float(history_v1.history["accuracy"][-1]) if "accuracy" in history_v1.history else None,
    "train_loss": float(history_v1.history["loss"][-1]) if "loss" in history_v1.history else None
  }

with open("../results/metrics_v1.json", "w") as f:
    json.dump(metrics_v1_data, f, indent=4)

print("Saved metrics_v1.json to results/")




Saved model_v1.h5 to models/
Saved metrics_v1.json to results/
