# Project 2 Model Classification
### Serena Shah, Osvaldo Salinas
## Part 1

### Loading the Data

In [7]:
import os
damage_all_paths = os.listdir('data_all_modified/damage')
no_damage_all_paths = os.listdir('data_all_modified/no_damage')

In [13]:
import random

print("...............")
train_damage_paths = random.sample(damage_all_paths, int(len(damage_all_paths)*0.8))
print("train damage image count: ", len(train_damage_paths))
test_damage_paths = [ p for p in damage_all_paths if p not in train_damage_paths]
print("test damage image count: ", len(test_damage_paths))
# ensure no overlap:
overlap = [p for p in train_damage_paths if p in test_damage_paths]
print("len of overlap: ", len(overlap))
print("...............\n")
print("...............")
train_no_damage_paths = random.sample(no_damage_all_paths, int(len(no_damage_all_paths)*0.8))
print("train no damage image count: ", len(train_no_damage_paths))
test_no_damage_paths = [ p for p in no_damage_all_paths if p not in train_no_damage_paths]
print("test no damage image count: ", len(test_no_damage_paths))
# ensure no overlap:
overlap = [p for p in train_no_damage_paths if p in test_no_damage_paths]
print("len of overlap: ", len(overlap))
print("...............\n")

...............
train damage image count:  11336
test damage image count:  2834
len of overlap:  0
...............

...............
train no damage image count:  5721
test no damage image count:  1431
len of overlap:  0
...............


In [16]:
from pathlib import Path

Path("data/train/damage").mkdir(parents=True, exist_ok=True)
Path("data/train/no_damage").mkdir(parents=True, exist_ok=True)

Path("data/test/damage").mkdir(parents=True, exist_ok=True)
Path("data/test/no_damage").mkdir(parents=True, exist_ok=True)

In [17]:
import shutil

root_dir = 'data_all_modified'
split_root_dir = 'data'

# Copy damaged images to train and test directories
for p in train_damage_paths:
    shutil.copyfile(os.path.join(root_dir, 'damage', p), os.path.join(split_root_dir, 'train/damage', p))

for p in test_damage_paths:
    shutil.copyfile(os.path.join(root_dir, 'damage', p), os.path.join(split_root_dir, 'test/damage', p))

# Copy no damage images to train and test directories
for p in train_no_damage_paths:
    shutil.copyfile(os.path.join(root_dir, 'no_damage', p), os.path.join(split_root_dir, 'train/no_damage', p))

for p in test_no_damage_paths:
    shutil.copyfile(os.path.join(root_dir, 'no_damage', p), os.path.join(split_root_dir, 'test/no_damage', p))

# Check counts to ensure files are copied correctly
print("Files in train/damage: ", len(os.listdir(os.path.join(split_root_dir, "train/damage"))))
print("Files in train/no_damage: ", len(os.listdir(os.path.join(split_root_dir, "train/no_damage"))))
print("Files in test/damage: ", len(os.listdir(os.path.join(split_root_dir, "test/damage"))))
print("Files in test/no_damage: ", len(os.listdir(os.path.join(split_root_dir, "test/no_damage"))))

Files in train/damage:  11336
Files in train/no_damage:  5721
Files in test/damage:  2834
Files in test/no_damage:  1431


### Data preprocessing

In [2]:
import tensorflow as tf
from tensorflow.keras.layers import Rescaling

train_data_dir = 'data/train/'
test_data_dir = 'data/test/'

# Adjusting the target image size to 224x224 for compatibility with VGG16
img_height = 128
img_width = 128

# Adjusting batch size for training
batch_size_train = 32
# Adjusting batch size for testing to keep it consistent with training
batch_size_test = 32

# Loading and preprocessing training and validation dataset
train_ds, val_ds = tf.keras.utils.image_dataset_from_directory(
    train_data_dir,
    validation_split=0.2,
    subset="both",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size_train
)

rescale = Rescaling(scale=1.0/255)
train_rescale_ds = train_ds.map(lambda image, label: (rescale(image), label))
val_rescale_ds = val_ds.map(lambda image, label: (rescale(image), label))

# Loading and preprocessing test dataset
test_ds = tf.keras.utils.image_dataset_from_directory(
    test_data_dir,
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size_test
)

test_rescale_ds = test_ds.map(lambda image,label:(rescale(image),label))

Found 17057 files belonging to 2 classes.
Using 13646 files for training.
Using 3411 files for validation.
Found 4265 files belonging to 2 classes.


## Part 2
#### Lenet-5

In [5]:
from tensorflow.keras import layers, models, optimizers

model_lenet5 = models.Sequential()

# Adjusting the input shape to match the preprocessed images
model_lenet5.add(layers.Conv2D(6, kernel_size=(3, 3), activation='relu', input_shape=(128, 128, 3)))
model_lenet5.add(layers.AveragePooling2D(pool_size=(2, 2)))

model_lenet5.add(layers.Conv2D(16, kernel_size=(3, 3), activation='relu'))
model_lenet5.add(layers.AveragePooling2D(pool_size=(2, 2)))

# Flatten the feature maps before feeding into fully connected layers
model_lenet5.add(layers.Flatten())  # This is the missing layer

model_lenet5.add(layers.Dense(120, activation='relu'))
model_lenet5.add(layers.Dense(84, activation='relu'))
model_lenet5.add(layers.Dense(3, activation='softmax'))  # Assuming a 3-class classification problem

model_lenet5.compile(optimizer=optimizers.RMSprop(learning_rate=1e-4),
                     loss='sparse_categorical_crossentropy',
                     metrics=['accuracy'])

model_lenet5.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 126, 126, 6)       168       
                                                                 
 average_pooling2d_2 (Avera  (None, 63, 63, 6)         0         
 gePooling2D)                                                    
                                                                 
 conv2d_3 (Conv2D)           (None, 61, 61, 16)        880       
                                                                 
 average_pooling2d_3 (Avera  (None, 30, 30, 16)        0         
 gePooling2D)                                                    
                                                                 
 flatten_1 (Flatten)         (None, 14400)             0         
                                                                 
 dense_3 (Dense)             (None, 120)              

In [None]:
history = model_lenet5.fit(
    train_rescale_ds,
    batch_size=32,  # This is already defined by the dataset, no need to specify again here
    epochs=20,
    validation_data=val_rescale_ds
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
 86/427 [=====>........................] - ETA: 2:43 - loss: 0.3593 - accuracy: 0.8543