# Project 2 Model Classification
### Serena Shah, Osvaldo Salinas
## Part 1

### Loading the Data

In [7]:
import os
damage_all_paths = os.listdir('data_all_modified/damage')
no_damage_all_paths = os.listdir('data_all_modified/no_damage')

In [13]:
import random

print("...............")
train_damage_paths = random.sample(damage_all_paths, int(len(damage_all_paths)*0.8))
print("train damage image count: ", len(train_damage_paths))
test_damage_paths = [ p for p in damage_all_paths if p not in train_damage_paths]
print("test damage image count: ", len(test_damage_paths))
# ensure no overlap:
overlap = [p for p in train_damage_paths if p in test_damage_paths]
print("len of overlap: ", len(overlap))
print("...............\n")
print("...............")
train_no_damage_paths = random.sample(no_damage_all_paths, int(len(no_damage_all_paths)*0.8))
print("train no damage image count: ", len(train_no_damage_paths))
test_no_damage_paths = [ p for p in no_damage_all_paths if p not in train_no_damage_paths]
print("test no damage image count: ", len(test_no_damage_paths))
# ensure no overlap:
overlap = [p for p in train_no_damage_paths if p in test_no_damage_paths]
print("len of overlap: ", len(overlap))
print("...............\n")

...............
train damage image count:  11336
test damage image count:  2834
len of overlap:  0
...............

...............
train no damage image count:  5721
test no damage image count:  1431
len of overlap:  0
...............


In [16]:
from pathlib import Path

Path("data/train/damage").mkdir(parents=True, exist_ok=True)
Path("data/train/no_damage").mkdir(parents=True, exist_ok=True)

Path("data/test/damage").mkdir(parents=True, exist_ok=True)
Path("data/test/no_damage").mkdir(parents=True, exist_ok=True)

In [17]:
import shutil

root_dir = 'data_all_modified'
split_root_dir = 'data'

# Copy damaged images to train and test directories
for p in train_damage_paths:
    shutil.copyfile(os.path.join(root_dir, 'damage', p), os.path.join(split_root_dir, 'train/damage', p))

for p in test_damage_paths:
    shutil.copyfile(os.path.join(root_dir, 'damage', p), os.path.join(split_root_dir, 'test/damage', p))

# Copy no damage images to train and test directories
for p in train_no_damage_paths:
    shutil.copyfile(os.path.join(root_dir, 'no_damage', p), os.path.join(split_root_dir, 'train/no_damage', p))

for p in test_no_damage_paths:
    shutil.copyfile(os.path.join(root_dir, 'no_damage', p), os.path.join(split_root_dir, 'test/no_damage', p))

# Check counts to ensure files are copied correctly
print("Files in train/damage: ", len(os.listdir(os.path.join(split_root_dir, "train/damage"))))
print("Files in train/no_damage: ", len(os.listdir(os.path.join(split_root_dir, "train/no_damage"))))
print("Files in test/damage: ", len(os.listdir(os.path.join(split_root_dir, "test/damage"))))
print("Files in test/no_damage: ", len(os.listdir(os.path.join(split_root_dir, "test/no_damage"))))

Files in train/damage:  11336
Files in train/no_damage:  5721
Files in test/damage:  2834
Files in test/no_damage:  1431


### Data preprocessing

In [2]:
import tensorflow as tf
from tensorflow.keras.layers import Rescaling

train_data_dir = 'data/train/'
test_data_dir = 'data/test/'

# Adjusting the target image size to 224x224 for compatibility with VGG16
img_height = 128
img_width = 128

# Adjusting batch size for training
batch_size_train = 32
# Adjusting batch size for testing to keep it consistent with training
batch_size_test = 32

# Loading and preprocessing training and validation dataset
train_ds, val_ds = tf.keras.utils.image_dataset_from_directory(
    train_data_dir,
    validation_split=0.2,
    subset="both",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size_train
)

rescale = Rescaling(scale=1.0/255)
train_rescale_ds = train_ds.map(lambda image, label: (rescale(image), label))
val_rescale_ds = val_ds.map(lambda image, label: (rescale(image), label))

# Loading and preprocessing test dataset
test_ds = tf.keras.utils.image_dataset_from_directory(
    test_data_dir,
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size_test
)

test_rescale_ds = test_ds.map(lambda image,label:(rescale(image),label))

Found 17057 files belonging to 2 classes.
Using 13646 files for training.
Using 3411 files for validation.
Found 4265 files belonging to 2 classes.


## Part 2
#### Alernative Lenet-5 (as described in the research paper https://arxiv.org/pdf/1807.01688.pdf)

In [5]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, Rescaling
from tensorflow.keras.optimizers import Adam

train_data_dir = 'data/train/'
test_data_dir = 'data/test/'

img_height = 128
img_width = 128
batch_size_train = 32

# Define model architecture
def build_model(input_shape):
    model = Sequential([
        Rescaling(scale=1.0/255, input_shape=input_shape), # Rescaling layer as part of the model
        Conv2D(32, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(512, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])
    return model

# Load and preprocess datasets
train_ds, val_ds = tf.keras.utils.image_dataset_from_directory(
    train_data_dir,
    validation_split=0.2,
    subset="both",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size_train
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    test_data_dir,
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size_train
)

Found 17057 files belonging to 2 classes.
Using 13646 files for training.
Using 3411 files for validation.
Found 4265 files belonging to 2 classes.


In [6]:
# Model compilation
input_shape = (img_height, img_width, 3)  # Adjusting the input shape
model = build_model(input_shape)
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy', tf.keras.metrics.AUC()])

In [7]:
# Implement learning rate scheduling
lr_schedule = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.001)

In [None]:
# Model training
history = model.fit(train_ds,
                    validation_data=val_ds,
                    epochs=30,  # Adjust based on the convergence of your model
                    batch_size=batch_size_train,
                    callbacks=[lr_schedule])

Epoch 1/30

In [None]:
# Model evaluation
test_ds = test_loss, test_acc, test_auc = model.evaluate(test_ds)
print(f"Test Accuracy: {test_acc}, Test AUC: {test_auc}")

In [None]:
# Implement learning rate scheduling
lr_schedule = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.001)