In [25]:
import shutil
import os
from pathlib import Path

# Remove existing train/test directories if necessary
try:
    shutil.rmtree('data/Project3-split/train')
    shutil.rmtree('data/Project3-split/test')
except:
    pass

# Create train directories for images
Path('data/Project3-split/train/damage').mkdir(parents=True, exist_ok=True)
Path('data/Project3-split/train/no_damage').mkdir(parents=True, exist_ok=True)

# Create test directories for images
Path('data/Project3-split/test/damage').mkdir(parents=True, exist_ok=True)
Path('data/Project3-split/test/no_damage').mkdir(parents=True, exist_ok=True)

# Paths of images for each class (damage and no damage)
all_damage_file_paths = os.listdir('data/Project3/damage')
all_no_damage_file_paths = os.listdir('data/Project3/no_damage')

In [26]:
import random
# Create a random 80/20 training testing split for the damage images
damage_train_file_paths = random.sample(all_damage_file_paths, int(len(all_damage_file_paths) * 0.8))
print(f"Number of damage images in train: {len(damage_train_file_paths)}")
damage_test_file_paths = [path for path in all_damage_file_paths if path not in damage_train_file_paths]
print(f"Number of damage images in test: {len(damage_test_file_paths)}")
overlap = [path for path in damage_train_file_paths if path in damage_test_file_paths]
print(f"Overlap in damage images (should be zero): {len(overlap)}")

# Create a random 80/20 training testing split for the no damage images
no_damage_train_file_paths = random.sample(all_no_damage_file_paths, int(len(all_no_damage_file_paths) * 0.8))
print(f"Number of no damage images in train: {len(no_damage_train_file_paths)}")
no_damage_test_file_paths = [path for path in all_no_damage_file_paths if path not in no_damage_train_file_paths]
print(f"Number of no damage images in test: {len(no_damage_test_file_paths)}")
overlap = [path for path in no_damage_train_file_paths if path in no_damage_test_file_paths]
print(f"Overlap in no damage images (should be zero): {len(overlap)}")

Number of damage images in train: 800
Number of damage images in test: 200
Overlap in damage images (should be zero): 0
Number of no damage images in train: 486
Number of no damage images in test: 122
Overlap in no damage images (should be zero): 0


In [27]:
# Copy the images to the train/test directories
for path in damage_train_file_paths:
    shutil.copyfile(f"data/Project3/damage/{path}", f"data/Project3-split/train/damage/{path}")
for path in damage_test_file_paths:
    shutil.copyfile(f"data/Project3/damage/{path}", f"data/Project3-split/test/damage/{path}")
for path in no_damage_train_file_paths:
    shutil.copyfile(f"data/Project3/no_damage/{path}", f"data/Project3-split/train/no_damage/{path}")
for path in no_damage_test_file_paths:
    shutil.copyfile(f"data/Project3/no_damage/{path}", f"data/Project3-split/test/no_damage/{path}")

# Check counts
print("Files in train/damage: ", len(os.listdir('data/Project3-split/train/damage')))
print("Files in train/no_damage: ", len(os.listdir('data/Project3-split/train/no_damage')))
print("Files in test/damage: ", len(os.listdir('data/Project3-split/test/damage')))
print("Files in test/no_damage: ", len(os.listdir('data/Project3-split/test/no_damage')))

Files in train/damage:  800
Files in train/no_damage:  486
Files in test/damage:  200
Files in test/no_damage:  122


In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.layers import Rescaling
from tensorflow.keras.utils import image_dataset_from_directory

# Path to the training data directory
train_data_dir = 'data/Project3-split/train'

# Controls size of "batches" of images streamed
# when accesses the dataset, helps control memory usage
batch_size = 32

# Image size to which all images will be resized
img_height = 150
img_width = 150

# Create a training dataset from the directory
train_ds, val_ds = image_dataset_from_directory(
    train_data_dir,
    validation_split=0.2,
    subset="both",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size
)
rescale = Rescaling(1.0/255)

train_rescale_ds = train_ds.map(lambda x, y: (rescale(x), y))
val_rescale_ds = val_ds.map(lambda x, y: (rescale(x), y))

Found 1286 files belonging to 2 classes.
Using 1029 files for training.
Using 257 files for validation.


In [29]:
# Do the same for the test dataset
test_data_dir = 'data/Project3-split/test'
test_ds = image_dataset_from_directory(
    test_data_dir,
    image_size=(img_height, img_width),
    batch_size=batch_size
)
test_rescale_ds = test_ds.map(lambda x, y: (rescale(x), y))

Found 322 files belonging to 2 classes.


In [30]:
# Print image and label shape
print("Image shape: ", train_rescale_ds.element_spec[0].shape)
print("Label shape: ", train_rescale_ds.element_spec[1].shape)

Image shape:  (None, 128, 128, 3)
Label shape:  (None,)


In [None]:
# Part (a): A dense (i.e., fully connected) ANN
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
model = Sequential()

# Flatten input data
model.add(Flatten(input_shape=(150,150,3)))

# Input layer
model.add(Dense(150, activation='relu'))

# Hidden layer
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))

# Sigmoid activation selected for binary classification
model.add(Dense(1, activation='sigmoid'))

  super().__init__(**kwargs)


In [32]:
# Model training
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)
model.summary()
history = model.fit(
    train_rescale_ds,
    batch_size=32,
    epochs=20,
    validation_data=val_rescale_ds
)

Epoch 1/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 99ms/step - accuracy: 0.5339 - loss: 2.1025 - val_accuracy: 0.6770 - val_loss: 0.5975
Epoch 2/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 82ms/step - accuracy: 0.6625 - loss: 0.5958 - val_accuracy: 0.8054 - val_loss: 0.4310
Epoch 3/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 77ms/step - accuracy: 0.7781 - loss: 0.4855 - val_accuracy: 0.8482 - val_loss: 0.4547
Epoch 4/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 81ms/step - accuracy: 0.7993 - loss: 0.4412 - val_accuracy: 0.8171 - val_loss: 0.3803
Epoch 5/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 79ms/step - accuracy: 0.8269 - loss: 0.3911 - val_accuracy: 0.8171 - val_loss: 0.4078
Epoch 6/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 80ms/step - accuracy: 0.7564 - loss: 0.4481 - val_accuracy: 0.6576 - val_loss: 0.8017
Epoch 7/20
[1m33/33[0m [32m━━━━

In [9]:
# Evaluate model performance
test_loss, test_acc = model.evaluate(test_rescale_ds)
print('Test accuracy:', test_acc)
print('Test loss:', test_loss)

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.8316 - loss: 0.3698
Test accuracy: 0.8136646151542664
Test loss: 0.40247786045074463


In [None]:
# Part (b): The Lenet-5 CNN architecture

from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, AveragePooling2D 
model = Sequential()

# C1 Convolution layer
model.add(Conv2D(filters=6, kernel_size=(3,3), activation='relu', input_shape=(150, 150, 3)))

# S2 Subsampling layer
model.add(AveragePooling2D(pool_size=(2, 2)))

# C3 Convolution layer
model.add(Conv2D(filters=16, kernel_size=(3, 3), activation='relu'))

# S4 Subsampling layer
model.add(AveragePooling2D(pool_size=(2, 2)))

# Flatten Output to connect it to the fully connected layer
model.add(Flatten())

# C5 Fully Connected Layer
model.add(Dense(120, activation='relu'))

# FC6 Fully Connected Layers
model.add(Dense(84, activation='relu'))

# Output Layer
model.add(Dense(1, activation='sigmoid'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [11]:
# Model training
from tensorflow.keras.optimizers import RMSprop
model.compile(
    optimizer=RMSprop(learning_rate=1e-4),
    loss='binary_crossentropy',
    metrics=['accuracy']
)
model.summary()
history = model.fit(
    train_rescale_ds,
    batch_size=32,
    epochs=20,
    validation_data=val_rescale_ds
)

Epoch 1/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 72ms/step - accuracy: 0.6365 - loss: 0.6648 - val_accuracy: 0.4553 - val_loss: 0.6879
Epoch 2/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 51ms/step - accuracy: 0.6564 - loss: 0.6195 - val_accuracy: 0.6809 - val_loss: 0.5891
Epoch 3/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 50ms/step - accuracy: 0.7455 - loss: 0.5556 - val_accuracy: 0.5875 - val_loss: 0.6428
Epoch 4/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 55ms/step - accuracy: 0.7471 - loss: 0.5363 - val_accuracy: 0.7626 - val_loss: 0.5202
Epoch 5/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 51ms/step - accuracy: 0.7934 - loss: 0.4924 - val_accuracy: 0.6615 - val_loss: 0.5762
Epoch 6/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 53ms/step - accuracy: 0.7676 - loss: 0.4906 - val_accuracy: 0.7004 - val_loss: 0.5110
Epoch 7/20
[1m33/33[0m [32m━━━━

In [12]:
# Evaluate model performance
test_loss, test_acc = model.evaluate(test_rescale_ds)
print('Test accuracy:', test_acc)
print('Test loss:', test_loss)

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.8397 - loss: 0.3582
Test accuracy: 0.8416149020195007
Test loss: 0.3729707598686218


In [None]:
# Part (c): Alternate-Lenet-5 CNN architecture, described in the following
# paper (Table 1, Page 12 of the paper https://arxiv.org/pdf/1807.01688.pdf,
# but note that the dataset is not the same as that analyzed in the paper.)

from tensorflow.keras import layers, models, optimizers
model = models.Sequential()

# Add convolutional and pooling layers according to research paper
model.add(layers.Conv2D(32, (3, 3), activation='relu', \
    input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D((2, 2), padding='same'))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2), padding='same'))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2), padding='same'))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

# Add flattening, dropout, and fully connected layers
model.add(layers.Flatten())
model.add(layers.Dropout(0.2))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

# Compile the model using Root Mean Square Propagation, commonly used for
# training deep neural networks. Fit the model to training/validation data.
model.compile(
    optimizer=optimizers.RMSprop(learning_rate=0.0001),
    loss='binary_crossentropy', metrics=['accuracy']
)
model.summary()
history = model.fit(
    train_rescale_ds,
    batch_size=32,
    epochs=20,
    validation_data=val_rescale_ds
)

Epoch 1/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 270ms/step - accuracy: 0.6272 - loss: 0.6457 - val_accuracy: 0.7160 - val_loss: 0.5279
Epoch 2/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 257ms/step - accuracy: 0.7435 - loss: 0.5203 - val_accuracy: 0.7549 - val_loss: 0.5279
Epoch 3/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 231ms/step - accuracy: 0.7571 - loss: 0.4992 - val_accuracy: 0.7549 - val_loss: 0.4648
Epoch 4/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 219ms/step - accuracy: 0.7856 - loss: 0.4584 - val_accuracy: 0.7626 - val_loss: 0.4496
Epoch 5/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 217ms/step - accuracy: 0.7960 - loss: 0.4383 - val_accuracy: 0.7899 - val_loss: 0.4241
Epoch 6/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 239ms/step - accuracy: 0.8254 - loss: 0.4347 - val_accuracy: 0.8054 - val_loss: 0.4189
Epoch 7/20
[1m33/33[0m [

In [15]:
# Evaluate results of the Alternate-Lenet-5 CNN
test_loss, test_acc = model.evaluate(test_rescale_ds, verbose=0)
print(f"Loss on test: {test_loss:.3f}")
print(f"Accuracy on test: {test_acc:.3f}")

Loss on test: 0.242
Accuracy on test: 0.907


In [None]:
# Additional Model: VGG-16 Architecture
from tensorflow.keras.applications.vgg16 import VGG16
vgg_model = VGG16(
    weights='imagenet', include_top=False, input_shape=(150, 150, 3)
)
vgg_model.summary()

# Freeze the layers in the VGG16 model
for layer in vgg_model.layers:
    layer.trainable = False

new_model = models.Sequential()
new_model.add(vgg_model) # Feature extraction layers
new_model.add(layers.Flatten()) # Flatten the output

# Add flattening, dropout, and fully connected layers
new_model.add(layers.Dense(32, activation='relu'))
new_model.add(layers.Dropout(0.5))
new_model.add(layers.Dense(32, activation='relu'))
new_model.add(layers.Dense(1, activation='sigmoid'))

# Compile the model using Adam optimizer. Fit the model to training/
# validation data.
new_model.compile(
    optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']
)
new_model.summary()
history = new_model.fit(
            train_rescale_ds,
            batch_size=32,
            epochs=20,
            validation_data=val_rescale_ds
)

Epoch 1/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 1s/step - accuracy: 0.7327 - loss: 0.5303 - val_accuracy: 0.8794 - val_loss: 0.3455
Epoch 2/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 1s/step - accuracy: 0.8909 - loss: 0.2765 - val_accuracy: 0.9339 - val_loss: 0.1777
Epoch 3/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 996ms/step - accuracy: 0.9417 - loss: 0.1702 - val_accuracy: 0.9572 - val_loss: 0.1385
Epoch 4/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 2s/step - accuracy: 0.9505 - loss: 0.1435 - val_accuracy: 0.9689 - val_loss: 0.0968
Epoch 5/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 2s/step - accuracy: 0.9756 - loss: 0.0809 - val_accuracy: 0.9455 - val_loss: 0.1285
Epoch 6/20
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 993ms/step - accuracy: 0.9761 - loss: 0.0804 - val_accuracy: 0.9728 - val_loss: 0.0771
Epoch 7/20
[1m33/33[0m [32m━━━━

In [21]:
# Evaluate results of the VGG-16 CNN
test_loss, test_acc = new_model.evaluate(test_rescale_ds, verbose=0)
print(f"Loss on test: {test_loss:.3f}")
print(f"Accuracy on test: {test_acc:.3f}")

Loss on test: 0.047
Accuracy on test: 0.978


In [23]:
import numpy as np
from sklearn.metrics import classification_report

# Step 1: Collect true labels and predictions from the test dataset
y_true = []
y_pred = []

for images, labels in test_rescale_ds:
    preds = new_model.predict(images)
    y_true.extend(labels.numpy())
    y_pred.extend(np.round(preds).flatten())  # Convert probabilities to 0 or 1

# Step 2: Convert to numpy arrays
y_true = np.array(y_true)
y_pred = np.array(y_pred)

# Step 3: Generate the classification report (dictionary + string)
report_dict = classification_report(y_true, y_pred, output_dict=True)
report_text = classification_report(y_true, y_pred)

# Step 4: Print full text report
print("Full Classification Report:\n")
print(report_text)

# Step 5: Optionally, extract and print individual metrics
accuracy = report_dict['accuracy']
precision = report_dict['1']['precision']
recall = report_dict['1']['recall']
f1 = report_dict['1']['f1-score']

print("\nKey Metrics:")
print(f"Accuracy : {accuracy:.3f}")
print(f"Precision: {precision:.3f}")
print(f"Recall   : {recall:.3f}")
print(f"F1 Score : {f1:.3f}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 495ms/step
Full Classification Report:

              precision    recall  f1-score   support

           0       0.98      0.98      0.98       200
           1       0.97      0.98      0.97       122

    accuracy                      

In [19]:
# Keep track of best model (VGG 16)
vgg_16_model = new_model

# Save the model history for plotting later
vgg_16_model.save('vgg_16_model.keras')