# Project 3 - Hurricane Harvey Image Classification Using Neural Networks

## Part 1

In [1]:
import shutil
try:
    shutil.rmtree("./data/split/train")
    shutil.rmtree("./data/split/test")
except:
    pass

In [2]:
import os
from pathlib import Path

Path("./data/split/train/damage").mkdir(parents=True, exist_ok=True)
Path("./data/split/train/no_damage").mkdir(parents=True, exist_ok=True)

Path("./data/split/test/damage").mkdir(parents=True, exist_ok=True)
Path("./data/split/test/no_damage").mkdir(parents=True, exist_ok=True)

In [3]:
all_damage_file_paths = os.listdir('./data/damage')
all_no_damage_file_paths = os.listdir('./data/no_damage')

In [4]:
import random

train_damage_paths = random.sample(all_damage_file_paths, int(len(all_damage_file_paths)*0.8))
print("train damage image count: ", len(train_damage_paths))
test_damage_paths = [ p for p in all_damage_file_paths if p not in train_damage_paths]
print("test damage image count: ", len(test_damage_paths))
# ensure no overlap:
overlap = [p for p in train_damage_paths if p in test_damage_paths]
print("len of overlap: ", len(overlap))

train_no_damage_paths = random.sample(all_no_damage_file_paths, int(len(all_no_damage_file_paths)*0.8))
print("train no_damage image count: ", len(train_no_damage_paths))
test_no_damage_paths = [ p for p in all_no_damage_file_paths if p not in train_no_damage_paths]
print("test no_damage image count: ", len(test_no_damage_paths))
# ensure no overlap:
overlap = [p for p in train_no_damage_paths if p in test_no_damage_paths]
print("len of overlap: ", len(overlap))

train damage image count:  11336
test damage image count:  2834
len of overlap:  0
train no_damage image count:  5721
test no_damage image count:  1431
len of overlap:  0


In [5]:
import shutil
for p in train_damage_paths:
    shutil.copyfile(os.path.join('./data/damage', p), os.path.join('./data/split/train/damage', p) )

for p in test_damage_paths:
    shutil.copyfile(os.path.join('./data/damage', p), os.path.join('./data/split/test/damage', p) )

for p in train_no_damage_paths:
    shutil.copyfile(os.path.join('./data/no_damage', p), os.path.join('./data/split/train/no_damage', p) )

for p in test_no_damage_paths:
    shutil.copyfile(os.path.join('./data/no_damage', p), os.path.join('./data/split/test/no_damage', p) )

# check counts:
print("Files in train/damage: ", len(os.listdir("./data/split/train/damage")))
print("Files in train/no_damage: ", len(os.listdir("./data/split/train/no_damage")))

print("Files in test/damage: ", len(os.listdir("./data/split/test/damage")))
print("Files in test/no_damage: ", len(os.listdir("./data/split/test/no_damage")))

Files in train/damage:  11336
Files in train/no_damage:  5721
Files in test/damage:  2834
Files in test/no_damage:  1431


In [6]:
import tensorflow as tf
from tensorflow.keras.layers.experimental.preprocessing import Rescaling
train_data_dir = 'data/split/train/'

batch_size = 32
# target image size
img_height = 128
img_width = 128

# note that subset="training", "validation", "both", and dictates which dataset is returned
train_ds, val_ds = tf.keras.utils.image_dataset_from_directory(
train_data_dir,
validation_split=0.2,
subset="both",
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size
)
rescale = Rescaling(scale=1.0/255)
train_rescale_ds = train_ds.map(lambda image,label:(rescale(image),label))
val_rescale_ds = val_ds.map(lambda image,label:(rescale(image),label))

2024-04-11 04:01:32.560057: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-11 04:01:32.603503: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-11 04:01:32.603565: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-11 04:01:32.605299: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-04-11 04:01:32.613775: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-11 04:01:32.615666: I tensorflow/core/platform/cpu_feature_guard.cc:1

Found 17057 files belonging to 2 classes.
Using 13646 files for training.
Using 3411 files for validation.


In [7]:
test_data_dir = 'data/split/test/'

batch_size = 2

# this is what was used in the paper --
img_height = 128
img_width = 128

# note that subset="training", "validation", "both", and dictates what is returned
test_ds = tf.keras.utils.image_dataset_from_directory(
test_data_dir,
seed=123,
image_size=(img_height, img_width),
)

# approach 1: manually rescale data --
rescale = Rescaling(scale=1.0/255)
test_rescale_ds = test_ds.map(lambda image,label:(rescale(image),label))

Found 4265 files belonging to 2 classes.


## Part 2

### Artificial Neural Network (ANN)

In [8]:
from keras.models import Sequential
from keras.layers import Dense, Flatten

model_ann = Sequential() # initialize model

# First layer
model_ann.add(Flatten(input_shape=(128,128, 3)))

model_ann.add(Dense(512, activation='relu'))
model_ann.add(Dense(256, activation='relu'))
model_ann.add(Dense(128, activation='relu'))
model_ann.add(Dense(128, activation='relu'))
model_ann.add(Dense(64, activation='relu'))
model_ann.add(Dense(32, activation='relu'))

# Use sigmoid for last layer because problem is binary (damage or no damage)
model_ann.add(Dense(1, activation='sigmoid'))

# Compile model
model_ann.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Generating summary of model
model_ann.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 49152)             0         
                                                                 
 dense (Dense)               (None, 512)               25166336  
                                                                 
 dense_1 (Dense)             (None, 256)               131328    
                                                                 
 dense_2 (Dense)             (None, 128)               32896     
                                                                 
 dense_3 (Dense)             (None, 128)               16512     
                                                                 
 dense_4 (Dense)             (None, 64)                8256      
                                                                 
 dense_5 (Dense)             (None, 32)                2

In [9]:
history = model_ann.fit(
            train_rescale_ds,
            batch_size=32,
            epochs=20,
            validation_data=val_rescale_ds
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [10]:
test_loss, test_accuracy = model_ann.evaluate(test_rescale_ds, verbose=0)
print('Test Loss:', test_loss)
print('Test Accuracy:', test_accuracy)

Test Loss: 0.5654383897781372
Test Accuracy: 0.7064478397369385


In [11]:
model_ann.save('./models/ann.keras')

### LeNet-5 CNN Architecture

In [12]:
from keras import layers, models, optimizers
import pandas as pd

model_lenet5 = models.Sequential()

# Layer 1: Convolutional layer with 6 filters of size 3x3, followed by average pooling
model_lenet5.add(layers.Conv2D(6, kernel_size=(3, 3), activation='relu', input_shape=(128,128,3)))
model_lenet5.add(layers.AveragePooling2D(pool_size=(2, 2)))

# Layer 2: Convolutional layer with 16 filters of size 3x3, followed by average pooling
model_lenet5.add(layers.Conv2D(16, kernel_size=(3, 3), activation='relu'))
model_lenet5.add(layers.AveragePooling2D(pool_size=(2, 2)))

# Flatten the feature maps to feed into fully connected layers
model_lenet5.add(layers.Flatten())

# Layer 3: Fully connected layer with 120 neurons
model_lenet5.add(layers.Dense(120, activation='relu'))

# Layer 4: Fully connected layer with 84 neurons
model_lenet5.add(layers.Dense(84, activation='relu'))

# Output layer: Fully connected layer with 1 neuron
model_lenet5.add(Dense(1, activation='sigmoid'))

# Compile model
model_lenet5.compile(optimizer=optimizers.RMSprop(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])

# Generating the summary of the model
model_lenet5.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 126, 126, 6)       168       
                                                                 
 average_pooling2d (Average  (None, 63, 63, 6)         0         
 Pooling2D)                                                      
                                                                 
 conv2d_1 (Conv2D)           (None, 61, 61, 16)        880       
                                                                 
 average_pooling2d_1 (Avera  (None, 30, 30, 16)        0         
 gePooling2D)                                                    
                                                                 
 flatten_1 (Flatten)         (None, 14400)             0         
                                                                 
 dense_7 (Dense)             (None, 120)              

In [13]:
history = model_lenet5.fit(
            train_rescale_ds,
            batch_size=32,
            epochs=20,
            validation_data=val_rescale_ds
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [14]:
test_loss, test_accuracy = model_lenet5.evaluate(test_rescale_ds, verbose=0)
print('Test Loss:', test_loss)
print('Test Accuracy:', test_accuracy)

Test Loss: 0.291168749332428
Test Accuracy: 0.8874560594558716


In [15]:
model_lenet5.save('./models/lenet5.keras')

### Alternate LeNet-5 CNN Architecture

In [16]:
from tensorflow.keras.layers import MaxPooling2D, Dropout, Conv2D
from keras import optimizers

model_altlenet5 = Sequential()

model_altlenet5.add(Conv2D(32, (3, 3), activation='relu', input_shape=(128,128,3)))
model_altlenet5.add(MaxPooling2D((2, 2)))
model_altlenet5.add(Conv2D(64, (3, 3), activation='relu'))
model_altlenet5.add(MaxPooling2D((2, 2)))
model_altlenet5.add(Conv2D(128, (3, 3), activation='relu'))
model_altlenet5.add(MaxPooling2D((2, 2)))
model_altlenet5.add(Conv2D(128, (3, 3), activation='relu'))
model_altlenet5.add(MaxPooling2D((2, 2)))

# Flatten the feature maps to feed into fully connected layers
model_altlenet5.add(Flatten())

model_altlenet5.add(Dropout(0.5))

model_altlenet5.add(Dense(512, activation='relu'))
model_altlenet5.add(Dense(1, activation='sigmoid'))

# Compile model
model_altlenet5.compile(optimizer=optimizers.RMSprop(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])

# Generating the summary of the model
model_altlenet5.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 126, 126, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 63, 63, 32)        0         
 D)                                                              
                                                                 
 conv2d_3 (Conv2D)           (None, 61, 61, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 30, 30, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_4 (Conv2D)           (None, 28, 28, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 14, 14, 128)      

In [17]:
history = model_altlenet5.fit(
            train_rescale_ds,
            batch_size=32,
            epochs=20,
            validation_data=val_rescale_ds
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [18]:
test_loss, test_accuracy = model_altlenet5.evaluate(test_rescale_ds, verbose=0)
print('Test Loss:', test_loss)
print('Test Accuracy:', test_accuracy)

Test Loss: 0.08642759919166565
Test Accuracy: 0.9645955562591553


In [19]:
model_altlenet5.save('./models/altlenet5.keras')