# Project 2 Model Classification
### Serena Shah, Osvaldo Salinas
## Part 1

### Loading the Data

In [2]:
import os
from pathlib import Path

Path("data/train/damage").mkdir(parents=True, exist_ok=True)
Path("data/train/no_damage").mkdir(parents=True, exist_ok=True)

Path("data/test/damage").mkdir(parents=True, exist_ok=True)
Path("data/test/no_damage").mkdir(parents=True, exist_ok=True)

In [3]:
# we need paths of images for individual classes so we can copy them in the new directories that we created above

damage_all_paths = os.listdir('data_all_modified/damage')
no_damage_all_paths = os.listdir('data_all_modified/no_damage')

In [4]:
# split the image paths into train and test by randomly selecting 80% of the images in train and 20% in test.
import random

print("...............")
train_damage_paths = random.sample(damage_all_paths, int(len(damage_all_paths)*0.8))
print("train damage image count: ", len(train_damage_paths))
test_damage_paths = [ p for p in damage_all_paths if p not in train_damage_paths]
print("test damage image count: ", len(test_damage_paths))
# ensure no overlap:
overlap = [p for p in train_damage_paths if p in test_damage_paths]
print("len of overlap: ", len(overlap))
print("...............\n")
print("...............")
train_no_damage_paths = random.sample(no_damage_all_paths, int(len(no_damage_all_paths)*0.8))
print("train no damage image count: ", len(train_no_damage_paths))
test_no_damage_paths = [ p for p in no_damage_all_paths if p not in train_no_damage_paths]
print("test no damage image count: ", len(test_no_damage_paths))
# ensure no overlap:
overlap = [p for p in train_no_damage_paths if p in test_no_damage_paths]
print("len of overlap: ", len(overlap))
print("...............\n")

...............
train damage image count:  11336
test damage image count:  2834
len of overlap:  0
...............

...............
train no damage image count:  5721
test no damage image count:  1431
len of overlap:  0
...............



In [5]:
# copying of files in the train and test directories
import shutil

root_dir = 'data_all_modified'
split_root_dir = 'data'

# Copy damaged images to train and test directories
for p in train_damage_paths:
    shutil.copyfile(os.path.join(root_dir, 'damage', p), os.path.join(split_root_dir, 'train/damage', p))

for p in test_damage_paths:
    shutil.copyfile(os.path.join(root_dir, 'damage', p), os.path.join(split_root_dir, 'test/damage', p))

# Copy no damage images to train and test directories
for p in train_no_damage_paths:
    shutil.copyfile(os.path.join(root_dir, 'no_damage', p), os.path.join(split_root_dir, 'train/no_damage', p))

for p in test_no_damage_paths:
    shutil.copyfile(os.path.join(root_dir, 'no_damage', p), os.path.join(split_root_dir, 'test/no_damage', p))

# Check counts to ensure files are copied correctly
print("Files in train/damage: ", len(os.listdir(os.path.join(split_root_dir, "train/damage"))))
print("Files in train/no_damage: ", len(os.listdir(os.path.join(split_root_dir, "train/no_damage"))))
print("Files in test/damage: ", len(os.listdir(os.path.join(split_root_dir, "test/damage"))))
print("Files in test/no_damage: ", len(os.listdir(os.path.join(split_root_dir, "test/no_damage"))))

Files in train/damage:  13626
Files in train/no_damage:  6869
Files in test/damage:  5124
Files in test/no_damage:  2579


### Data preprocessing

In [6]:
import tensorflow as tf
from tensorflow.keras.layers import Rescaling
train_data_dir = 'data/train/'

batch_size = 32
# target image size
img_height = 128
img_width = 128

# note that subset="training", "validation", "both", and dictates which dataset is returned
train_ds, val_ds = tf.keras.utils.image_dataset_from_directory(
train_data_dir,
validation_split=0.2,
subset="both",
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size
)
rescale = Rescaling(scale=1.0/255)
train_rescale_ds = train_ds.map(lambda image,label:(rescale(image),label))
val_rescale_ds = val_ds.map(lambda image,label:(rescale(image),label))

2024-04-10 21:58:17.326966: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-10 21:58:17.346917: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-10 21:58:17.530265: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-10 21:58:17.530356: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-10 21:58:17.557042: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

Found 20495 files belonging to 2 classes.
Using 16396 files for training.
Using 4099 files for validation.


In [7]:
test_data_dir = 'data/test/'

batch_size = 2
# target image size
img_height = 128
img_width = 128

# note that subset="training", "validation", "both", and dictates what is returned
test_ds = tf.keras.utils.image_dataset_from_directory(
test_data_dir,
seed=123,
image_size=(img_height, img_width),
)

# approach 1: manually rescale data --
rescale = Rescaling(scale=1.0/255)
test_rescale_ds = test_ds.map(lambda image,label:(rescale(image),label))

Found 7703 files belonging to 2 classes.


## Part 2
### ANN

In [34]:
from keras.models import Sequential
from keras.layers import Dense, Flatten

model_ann = Sequential()

# Flatten
model_ann.add(Flatten(input_shape=(img_height, img_width, 3)))

# Our input layer can have any number of perceptrons, we chose 2, however,
# the input dimension must match the number of features in the independent variable -- therefore, we set
# it to 4
model_ann.add(Dense(120, input_dim=2, activation='relu'))

# we can add any number of hidden layers with any number of perceptrons; here we choose 1 layer with 128 perceptrons. The
# hidden layers should all use RELU
model_ann.add(Dense(128, activation='relu'))

# softmax activation function is selected for multi-label classification problems; there are 3 perceptrons in this
# last layer because there are 2 target labels to predict (it matches the shape of y)
model_ann.add(Dense(2, activation='softmax'))

# compile model
model_ann.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# generating summary of model
model_ann.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_3 (Flatten)         (None, 49152)             0         
                                                                 
 dense_15 (Dense)            (None, 4)                 196612    
                                                                 
 dense_16 (Dense)            (None, 128)               640       
                                                                 
 dense_17 (Dense)            (None, 2)                 258       
                                                                 
Total params: 197510 (771.52 KB)
Trainable params: 197510 (771.52 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [35]:
#fit the model from image generator
history_ann = model_ann.fit(
            train_rescale_ds,
            batch_size=32,
            epochs=20,
            validation_data=val_rescale_ds
)

Epoch 1/20


ValueError: in user code:

    File "/usr/local/lib/python3.11/site-packages/keras/src/engine/training.py", line 1401, in train_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.11/site-packages/keras/src/engine/training.py", line 1384, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.11/site-packages/keras/src/engine/training.py", line 1373, in run_step  **
        outputs = model.train_step(data)
    File "/usr/local/lib/python3.11/site-packages/keras/src/engine/training.py", line 1151, in train_step
        loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "/usr/local/lib/python3.11/site-packages/keras/src/engine/training.py", line 1209, in compute_loss
        return self.compiled_loss(
    File "/usr/local/lib/python3.11/site-packages/keras/src/engine/compile_utils.py", line 277, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "/usr/local/lib/python3.11/site-packages/keras/src/losses.py", line 143, in __call__
        losses = call_fn(y_true, y_pred)
    File "/usr/local/lib/python3.11/site-packages/keras/src/losses.py", line 270, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/usr/local/lib/python3.11/site-packages/keras/src/losses.py", line 2221, in categorical_crossentropy
        return backend.categorical_crossentropy(
    File "/usr/local/lib/python3.11/site-packages/keras/src/backend.py", line 5573, in categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)

    ValueError: Shapes (None, 1) and (None, 2) are incompatible


In [None]:
test_loss_ann, test_accuracy_ann = model_ann.evaluate(test_rescale_ds, verbose=0)
test_accuracy_ann

In [None]:
model_ann.save("models/ann.keras")

### LeNet-5

In [10]:
from keras import layers
from keras import models
import pandas as pd

model_lenet5 = models.Sequential()

# Layer 1: Convolutional layer with 6 filters of size 3x3, followed by average pooling
model_lenet5.add(layers.Conv2D(6, kernel_size=(3, 3), activation='relu', input_shape=(128,128,3)))
model_lenet5.add(layers.AveragePooling2D(pool_size=(2, 2)))

# Layer 2: Convolutional layer with 16 filters of size 3x3, followed by average pooling
model_lenet5.add(layers.Conv2D(16, kernel_size=(3, 3), activation='relu'))
model_lenet5.add(layers.AveragePooling2D(pool_size=(2, 2)))

# Flatten the feature maps to feed into fully connected layers
model_lenet5.add(layers.Flatten())

# Layer 3: Fully connected layer with 120 neurons
model_lenet5.add(layers.Dense(120, activation='relu'))

# Layer 4: Fully connected layer with 84 neurons
model_lenet5.add(layers.Dense(84, activation='relu'))

# Output layer: Fully connected layer with num_classes neurons (e.g., 3 )
model_lenet5.add(layers.Dense(3, activation='softmax'))

# Compile model
model_lenet5.compile(optimizer=optimizers.RMSprop(learning_rate=1e-4), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Generating the summary of the model
model_lenet5.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 126, 126, 6)       168       
                                                                 
 average_pooling2d (Average  (None, 63, 63, 6)         0         
 Pooling2D)                                                      
                                                                 
 conv2d_4 (Conv2D)           (None, 61, 61, 16)        880       
                                                                 
 average_pooling2d_1 (Avera  (None, 30, 30, 16)        0         
 gePooling2D)                                                    
                                                                 
 flatten_1 (Flatten)         (None, 14400)             0         
                                                                 
 dense_3 (Dense)             (None, 120)              

In [11]:
#fit the model from image generator
history = model_lenet5.fit(
            train_rescale_ds,
            batch_size=32,
            epochs=20,
            validation_data=val_rescale_ds
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


The Lenet-5 model accuracy is shown below.

In [12]:
test_loss_lenet5, test_accuracy_lenet5 = model_lenet5.evaluate(test_rescale_ds, verbose=0)
test_accuracy_lenet5

0.9524860382080078

In [13]:
model_lenet5.save("models/lenet5.keras")

### Alt LeNet-5

In [18]:
from keras import layers
from keras import models
import pandas as pd

model_altlenet = models.Sequential()

# Layer 1: Convolutional layer with 32 filters of size 3x3, followed by max pooling
model_altlenet.add(layers.Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(128,128,3)))
model_altlenet.add(layers.MaxPooling2D(pool_size=(2, 2)))

# Layer 2: Convolutional layer with 64 filters of size 3x3, followed by max pooling
model_altlenet.add(layers.Conv2D(64, kernel_size=(3, 3), activation='relu'))
model_altlenet.add(layers.MaxPooling2D(pool_size=(2, 2)))

# Layer 3: Convolutional layer with 128 filters of size 3x3, followed by max pooling
model_altlenet.add(layers.Conv2D(128, kernel_size=(3, 3), activation='relu'))
model_altlenet.add(layers.MaxPooling2D(pool_size=(2, 2)))

# Layer 4: Convolutional layer with 128 filters of size 3x3, followed by max pooling
model_altlenet.add(layers.Conv2D(128, kernel_size=(3, 3), activation='relu'))
model_altlenet.add(layers.MaxPooling2D(pool_size=(2, 2)))

# Flatten the feature maps to feed into fully connected layers
model_altlenet.add(layers.Flatten())

# Adding dropout prevents overfitting
model_altlenet.add(layers.Dropout(0.2))

# Layer 5: Fully connected layer with 120 neurons
model_altlenet.add(layers.Dense(120, activation='relu'))

# Layer 6: Fully connected layer with 84 neurons
model_altlenet.add(layers.Dense(84, activation='relu'))

# Output layer: Fully connected layer with num_classes neurons (e.g., 3 )
model_altlenet.add(layers.Dense(3, activation='softmax'))

# Compile model
model_altlenet.compile(optimizer=optimizers.RMSprop(learning_rate=1e-4), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Generating the summary of the model
model_altlenet.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_5 (Conv2D)           (None, 126, 126, 32)      896       
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 63, 63, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_6 (Conv2D)           (None, 61, 61, 64)        18496     
                                                                 
 max_pooling2d_4 (MaxPoolin  (None, 30, 30, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_7 (Conv2D)           (None, 28, 28, 128)       73856     
                                                                 
 max_pooling2d_5 (MaxPoolin  (None, 14, 14, 128)      

In [20]:
#fit the model from image generator
history_altlenet = model_altlenet.fit(
            train_rescale_ds,
            batch_size=32,
            epochs=20,
            validation_data=val_rescale_ds
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [21]:
test_loss_altlenet, test_accuracy_altlenet = model_altlenet.evaluate(test_rescale_ds, verbose=0)
test_accuracy_altlenet

0.9787095785140991

In [22]:
model_lenet5.save("models/altlenet.keras")

## Part 3

In [None]:
import requests

# grab an entry from X_test -- here, we grab the first one
l = X_test[0].tolist()

# make the POST request passing the single test case as the `image` field:
rsp = requests.post("http://172.17.0.1:5000/models/", json={"image": l})

# print the json response
rsp.json()

{'result': [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]]}