# Required Imports #

In [None]:
pip install tensorflow_datasets --user

In [166]:
import numpy as np
import random
import matplotlib.pyplot as plt
import pandas as pd

import keras
import shutil
import os
from pathlib import Path
from keras import layers
from keras import models
from keras import optimizers

import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.layers.experimental.preprocessing import Rescaling
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.callbacks import LambdaCallback

# ANN Model Manipulation #

## Data Preprocessing ##

In [132]:
# Make sure your directories are clean for your train and test data!!
try:
    shutil.rmtree("Data/Split_Data/Test")
    shutil.rmtree("Data/Split_Data/Train")
except:
    pass

In [133]:
# Now let's make our train/test directories!!

Path("Data/Split_Data/Test/No_Damage").mkdir(parents=True, exist_ok=True)
Path("Data/Split_Data/Test/Damaged").mkdir(parents=True, exist_ok=True)

Path("Data/Split_Data/Train/No_Damage").mkdir(parents=True, exist_ok=True)
Path("Data/Split_Data/Train/Damaged").mkdir(parents=True, exist_ok=True)

# Now We have to make sure we have a path for all the files.
All_NoDmg = os.listdir('Data/OG_Data/no_damage')
All_Dmg = os.listdir('Data/OG_Data/damage')

In [134]:
# Time to split our data into the train and test folders!! Using 80/20!!

Tr_NoDmg = random.sample(All_NoDmg, int(len(All_NoDmg)*0.8))
print("Train No_Damage image count: ", len(Tr_NoDmg))
Ts_NoDmg = [ p for p in All_NoDmg if p not in Tr_NoDmg]
print("Test No_Damage image count: ", len(Ts_NoDmg))
# ensure no overlap:
overlap = [p for p in Tr_NoDmg if p in Ts_NoDmg]
print("Length of overlap: ", len(overlap))

print("\n")

Tr_Dmg = random.sample(All_Dmg, int(len(All_Dmg)*0.8))
print("Train No Damage image count: ", len(Tr_Dmg))
Ts_Dmg = [ p for p in All_Dmg if p not in Tr_Dmg]
print("Test No Damage image count: ", len(Ts_Dmg))
# ensure no overlap:
overlap = [p for p in Tr_Dmg if p in Ts_Dmg]
print("Length of overlap: ", len(overlap))

Train No_Damage image count:  5721
Test No_Damage image count:  1431
Length of overlap:  0


Train No Damage image count:  11336
Test No Damage image count:  2834
Length of overlap:  0


In [135]:
# Make sure to actually put the data into the folders hehe

for p in Ts_NoDmg:
    shutil.copyfile(os.path.join('Data/OG_Data/no_damage', p), os.path.join('Data/Split_Data/Test/No_Damage', p) )
for p in Tr_NoDmg:
    shutil.copyfile(os.path.join('Data/OG_Data/no_damage', p), os.path.join('Data/Split_Data/Train/No_Damage', p) )

for p in Ts_Dmg:
    shutil.copyfile(os.path.join('Data/OG_Data/damage', p), os.path.join('Data/Split_Data/Test/Damaged', p) )
for p in Tr_Dmg:
    shutil.copyfile(os.path.join('Data/OG_Data/damage', p), os.path.join('Data/Split_Data/Train/Damaged', p) )

# check counts:
cpd_TrNoDmg = len(os.listdir("Data/Split_Data/Train/No_Damage"))
cpd_TsNoDmg = len(os.listdir("Data/Split_Data/Test/No_Damage"))
cpd_TrDmg = len(os.listdir("Data/Split_Data/Train/Damaged"))
cpd_TsDmg = len(os.listdir("Data/Split_Data/Test/Damaged"))

print("Files in Train/No_Damage: ", cpd_TrNoDmg)
print("Files in Test/No_Damage: ", cpd_TsNoDmg)
print("\n")
print("Files in Train/Damaged: ", cpd_TrDmg)
print("Files in Test/Damaged: ", cpd_TsDmg)

Files in Train/No_Damage:  5721
Files in Test/No_Damage:  1431


Files in Train/Damaged:  11336
Files in Test/Damaged:  2834


In [136]:
# We need them to be 1D arrays!!
print("Training data for no damaged shape:", TrNoDmg.shape, "& training damaged shape:", TrDmg.shape)
print("Testing data for no damaged shape:", TsNoDmg.shape, "& testing damaged shape:", TsDmg.shape)

Training data for no damaged shape: (5721,) & training damaged shape: (11336,)
Testing data for no damaged shape: (1431,) & testing damaged shape: (2834,)


## ANN Model ##

For ANN Models we need to flatten the images, and with a bit of external investigation, I saw that the majority of the file I checked had a size of 128x128 pixels. So I am going to use that as my base!!

In [144]:
train_data_dir = 'Data/Split_Data/Train'

batch_size = 32
# target image size
img_height = 128
img_width = 128

# note that subset="training", "validation", "both", and dictates which dataset is returned
train_ds, val_ds = tf.keras.utils.image_dataset_from_directory(
train_data_dir,
validation_split=0.2,
subset="both",
seed=224,
image_size=(img_height, img_width),
batch_size=batch_size
)
rescale = Rescaling(scale=1.0/255)
train_rescale_ds = train_ds.map(lambda image,label:(rescale(image),label))
val_rescale_ds = val_ds.map(lambda image,label:(rescale(image),label))

Found 17057 files belonging to 2 classes.
Using 13646 files for training.
Using 3411 files for validation.


In [145]:
test_data_dir = 'Data/Split_Data/Test'

batch_size = 32

img_height = 128
img_width = 128

# note that subset="training", "validation", "both", and dictates what is returned
test_ds = tf.keras.utils.image_dataset_from_directory(
test_data_dir,
seed=224,
image_size=(img_height, img_width),
)

rescale = Rescaling(scale=1.0/255)
test_rescale_ds = test_ds.map(lambda image,label:(rescale(image),label))

Found 4265 files belonging to 2 classes.


In [152]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [153]:
model.fit(train_rescale_ds,batch_size=32,epochs=20,validation_data=val_rescale_ds)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x7fce97466590>

In [154]:
test_loss, test_accuracy = model.evaluate(test_rescale_ds, verbose=0)
test_accuracy

0.6644783020019531

# CNN Model Manipulation #

## Data Preprocessing ##

In [167]:
# Make sure your directories are clean for your train and test data!!
try:
    shutil.rmtree("Data/Split_Data/Test")
    shutil.rmtree("Data/Split_Data/Train")
except:
    pass

In [168]:
# Now let's make our train/test directories!!

Path("Data/Split_Data/Test/No_Damage").mkdir(parents=True, exist_ok=True)
Path("Data/Split_Data/Test/Damaged").mkdir(parents=True, exist_ok=True)

Path("Data/Split_Data/Train/No_Damage").mkdir(parents=True, exist_ok=True)
Path("Data/Split_Data/Train/Damaged").mkdir(parents=True, exist_ok=True)

# Now We have to make sure we have a path for all the files.
All_NoDmg = os.listdir('Data/OG_Data/no_damage')
All_Dmg = os.listdir('Data/OG_Data/damage')

In [169]:
# Time to split our data into the train and test folders!! Using 80/20!!

Tr_NoDmg = random.sample(All_NoDmg, int(len(All_NoDmg)*0.8))
print("Train No_Damage image count: ", len(Tr_NoDmg))
Ts_NoDmg = [ p for p in All_NoDmg if p not in Tr_NoDmg]
print("Test No_Damage image count: ", len(Ts_NoDmg))
# ensure no overlap:
overlap = [p for p in Tr_NoDmg if p in Ts_NoDmg]
print("Length of overlap: ", len(overlap))

print("\n")

Tr_Dmg = random.sample(All_Dmg, int(len(All_Dmg)*0.8))
print("Train No Damage image count: ", len(Tr_Dmg))
Ts_Dmg = [ p for p in All_Dmg if p not in Tr_Dmg]
print("Test No Damage image count: ", len(Ts_Dmg))
# ensure no overlap:
overlap = [p for p in Tr_Dmg if p in Ts_Dmg]
print("Length of overlap: ", len(overlap))

Train No_Damage image count:  5721
Test No_Damage image count:  1431
Length of overlap:  0


Train No Damage image count:  11336
Test No Damage image count:  2834
Length of overlap:  0


In [170]:
# Make sure to actually put the data into the folders hehe

for p in Ts_NoDmg:
    shutil.copyfile(os.path.join('Data/OG_Data/no_damage', p), os.path.join('Data/Split_Data/Test/No_Damage', p) )
for p in Tr_NoDmg:
    shutil.copyfile(os.path.join('Data/OG_Data/no_damage', p), os.path.join('Data/Split_Data/Train/No_Damage', p) )

for p in Ts_Dmg:
    shutil.copyfile(os.path.join('Data/OG_Data/damage', p), os.path.join('Data/Split_Data/Test/Damaged', p) )
for p in Tr_Dmg:
    shutil.copyfile(os.path.join('Data/OG_Data/damage', p), os.path.join('Data/Split_Data/Train/Damaged', p) )

# check counts:
cpd_TrNoDmg = len(os.listdir("Data/Split_Data/Train/No_Damage"))
cpd_TsNoDmg = len(os.listdir("Data/Split_Data/Test/No_Damage"))
cpd_TrDmg = len(os.listdir("Data/Split_Data/Train/Damaged"))
cpd_TsDmg = len(os.listdir("Data/Split_Data/Test/Damaged"))

print("Files in Train/No_Damage: ", cpd_TrNoDmg)
print("Files in Test/No_Damage: ", cpd_TsNoDmg)
print("\n")
print("Files in Train/Damaged: ", cpd_TrDmg)
print("Files in Test/Damaged: ", cpd_TsDmg)

Files in Train/No_Damage:  5721
Files in Test/No_Damage:  1431


Files in Train/Damaged:  11336
Files in Test/Damaged:  2834


## Lenet-5 CNN Model ##

In [171]:
train_data_dir = 'Data/Split_Data/Train'

batch_size = 32
# target image size
img_height = 128
img_width = 128

# note that subset="training", "validation", "both", and dictates which dataset is returned
train_ds, val_ds = tf.keras.utils.image_dataset_from_directory(
train_data_dir,
validation_split=0.2,
subset="both",
seed=224,
image_size=(img_height, img_width),
batch_size=batch_size
)
rescale = Rescaling(scale=1.0/255)
train_rescale_ds = train_ds.map(lambda image,label:(rescale(image),label))
val_rescale_ds = val_ds.map(lambda image,label:(rescale(image),label))

Found 17057 files belonging to 2 classes.
Using 13646 files for training.
Using 3411 files for validation.


In [172]:
test_data_dir = 'Data/Split_Data/Test'

batch_size = 2

img_height = 128
img_width = 128

# note that subset="training", "validation", "both", and dictates what is returned
test_ds = tf.keras.utils.image_dataset_from_directory(
test_data_dir,
seed=224,
image_size=(img_height, img_width),
)

# approach 1: manually rescale data --
rescale = Rescaling(scale=1.0/255)
test_rescale_ds = test_ds.map(lambda image,label:(rescale(image),label))

Found 4265 files belonging to 2 classes.


In [174]:
image_size=128*128

model_lenet5 = models.Sequential()

# Layer 1: Convolutional layer with 6 filters of size 3x3, followed by average pooling
model_lenet5.add(layers.Conv2D(6, kernel_size=(3, 3), activation='relu', input_shape=(128,128,3)))
model_lenet5.add(layers.AveragePooling2D(pool_size=(2, 2)))
model_lenet5.add(layers.Conv2D(64, (3, 3), activation='relu', padding="same", input_shape=(128,128,3)))

# Layer 2: Convolutional layer with 16 filters of size 3x3, followed by average pooling
model_lenet5.add(layers.Conv2D(16, kernel_size=(3, 3), activation='relu'))
model_lenet5.add(layers.AveragePooling2D(pool_size=(2, 2)))

# Flatten the feature maps to feed into fully connected layers
model_lenet5.add(layers.Flatten())

# Layer 3: Fully connected layer with 120 neurons
model_lenet5.add(layers.Dense(120, activation='relu'))

# Layer 4: Fully connected layer with 84 neurons
model_lenet5.add(layers.Dense(84, activation='relu'))

# Output layer: Fully connected layer with num_classes neurons (e.g., 3 )
model_lenet5.add(layers.Dense(3, activation='softmax'))

# Compile model
model_lenet5.compile(optimizer=optimizers.RMSprop(learning_rate=1e-4), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Generating the summary of the model
model_lenet5.summary()

Model: "sequential_25"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_1 (Conv2D)           (None, 126, 126, 6)       168       
                                                                 
 average_pooling2d_1 (Avera  (None, 63, 63, 6)         0         
 gePooling2D)                                                    
                                                                 
 conv2d_2 (Conv2D)           (None, 63, 63, 64)        3520      
                                                                 
 conv2d_3 (Conv2D)           (None, 61, 61, 16)        9232      
                                                                 
 average_pooling2d_2 (Avera  (None, 30, 30, 16)        0         
 gePooling2D)                                                    
                                                                 
 flatten_12 (Flatten)        (None, 14400)           

In [175]:
#fit the model from image generator
history = model_lenet5.fit(
            train_rescale_ds,
            batch_size=32,
            epochs=20,
            validation_data=val_rescale_ds
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [177]:
test_loss, test_accuracy = model_lenet5.evaluate(test_rescale_ds, verbose=0)
test_accuracy

0.9470105767250061

## Alternate Lenet-5 CNN Model ##

In [178]:
image_size=128*128

model_altL5 = models.Sequential()

# Layer 1: Convolutional layer with 32 filters of size 3x3, followed by average pooling
model_altL5.add(layers.Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(128,128,3)))
model_altL5.add(layers.MaxPooling2D(pool_size=(2, 2)))

# Layer 2: Convolutional layer with 64 filters of size 3x3, followed by average pooling
model_altL5.add(layers.Conv2D(64, kernel_size=(3, 3), activation='relu'))
model_altL5.add(layers.MaxPooling2D(pool_size=(2, 2)))

# Layer 3: Convolutional layer with 128 filters of size 3x3, followed by average pooling
model_altL5.add(layers.Conv2D(128, kernel_size=(3, 3), activation='relu', input_shape=(128,128,3)))
model_altL5.add(layers.MaxPooling2D(pool_size=(2, 2)))

# Layer 4: Convolutional layer with 32 filters of size 3x3, followed by average pooling
model_altL5.add(layers.Conv2D(128, kernel_size=(3, 3), activation='relu', input_shape=(128,128,3)))
model_altL5.add(layers.MaxPooling2D(pool_size=(2, 2)))

# Flatten the feature maps to feed into fully connected layers
model_altL5.add(layers.Flatten())

# Adding dropout prevents overfitting
model_altL5.add(layers.Dropout(0.2))

# Layer 4: Fully connected layer with 84 neurons
model_altL5.add(layers.Dense(512, activation='relu'))

# Output layer: Fully connected layer with num_classes neurons (e.g., 3 )
model_altL5.add(layers.Dense(2, activation='softmax'))

# Compile model
model_altL5.compile(optimizer=optimizers.RMSprop(learning_rate=1e-4), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Generating the summary of the model
model_altL5.summary()

Model: "sequential_26"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4 (Conv2D)           (None, 126, 126, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 63, 63, 32)        0         
 D)                                                              
                                                                 
 conv2d_5 (Conv2D)           (None, 61, 61, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 30, 30, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_6 (Conv2D)           (None, 28, 28, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 14, 14, 128)     

In [179]:
#fit the model from image generator
history = model_altL5.fit(
            train_rescale_ds,
            batch_size=32,
            epochs=20,
            validation_data=val_rescale_ds
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [180]:
test_loss, test_accuracy = model_altL5.evaluate(test_rescale_ds, verbose=0)
test_accuracy

0.96858149766922