# Project 2 Model Classification
### Serena Shah, Osvaldo Salinas
## Part 1

### Loading the Data

In [40]:
import os
from pathlib import Path

Path("data/train/damage").mkdir(parents=True, exist_ok=True)
Path("data/train/no_damage").mkdir(parents=True, exist_ok=True)

Path("data/test/damage").mkdir(parents=True, exist_ok=True)
Path("data/test/no_damage").mkdir(parents=True, exist_ok=True)

In [41]:
# we need paths of images for individual classes so we can copy them in the new directories that we created above

damage_all_paths = os.listdir('data_all_modified/damage')
no_damage_all_paths = os.listdir('data_all_modified/no_damage')

In [42]:
# split the image paths into train and test by randomly selecting 80% of the images in train and 20% in test.
import random

print("...............")
train_damage_paths = random.sample(damage_all_paths, int(len(damage_all_paths)*0.8))
print("train damage image count: ", len(train_damage_paths))
test_damage_paths = [ p for p in damage_all_paths if p not in train_damage_paths]
print("test damage image count: ", len(test_damage_paths))
# ensure no overlap:
overlap = [p for p in train_damage_paths if p in test_damage_paths]
print("len of overlap: ", len(overlap))
print("...............\n")
print("...............")
train_no_damage_paths = random.sample(no_damage_all_paths, int(len(no_damage_all_paths)*0.8))
print("train no damage image count: ", len(train_no_damage_paths))
test_no_damage_paths = [ p for p in no_damage_all_paths if p not in train_no_damage_paths]
print("test no damage image count: ", len(test_no_damage_paths))
# ensure no overlap:
overlap = [p for p in train_no_damage_paths if p in test_no_damage_paths]
print("len of overlap: ", len(overlap))
print("...............\n")

...............
train damage image count:  11336
test damage image count:  2834
len of overlap:  0
...............

...............
train no damage image count:  5721
test no damage image count:  1431
len of overlap:  0
...............



In [43]:
# copying of files in the train and test directories
import shutil

root_dir = 'data_all_modified'
split_root_dir = 'data'

# Copy damaged images to train and test directories
for p in train_damage_paths:
    shutil.copyfile(os.path.join(root_dir, 'damage', p), os.path.join(split_root_dir, 'train/damage', p))

for p in test_damage_paths:
    shutil.copyfile(os.path.join(root_dir, 'damage', p), os.path.join(split_root_dir, 'test/damage', p))

# Copy no damage images to train and test directories
for p in train_no_damage_paths:
    shutil.copyfile(os.path.join(root_dir, 'no_damage', p), os.path.join(split_root_dir, 'train/no_damage', p))

for p in test_no_damage_paths:
    shutil.copyfile(os.path.join(root_dir, 'no_damage', p), os.path.join(split_root_dir, 'test/no_damage', p))

# Check counts to ensure files are copied correctly
print("Files in train/damage: ", len(os.listdir(os.path.join(split_root_dir, "train/damage"))))
print("Files in train/no_damage: ", len(os.listdir(os.path.join(split_root_dir, "train/no_damage"))))
print("Files in test/damage: ", len(os.listdir(os.path.join(split_root_dir, "test/damage"))))
print("Files in test/no_damage: ", len(os.listdir(os.path.join(split_root_dir, "test/no_damage"))))

Files in train/damage:  14149
Files in train/no_damage:  7145
Files in test/damage:  8331
Files in test/no_damage:  4235


### Data preprocessing

In [44]:
import tensorflow as tf
from tensorflow.keras.layers import Rescaling
train_data_dir = 'data/train/'

batch_size = 32
# target image size
img_height = 128
img_width = 128

# note that subset="training", "validation", "both", and dictates which dataset is returned
train_ds, val_ds = tf.keras.utils.image_dataset_from_directory(
train_data_dir,
validation_split=0.2,
subset="both",
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size
)
rescale = Rescaling(scale=1.0/255)
train_rescale_ds = train_ds.map(lambda image,label:(rescale(image),label))
val_rescale_ds = val_ds.map(lambda image,label:(rescale(image),label))

Found 21294 files belonging to 2 classes.
Using 17036 files for training.
Using 4258 files for validation.


In [45]:
test_data_dir = 'data/test/'

batch_size = 2
# target image size
img_height = 128
img_width = 128

# note that subset="training", "validation", "both", and dictates what is returned
test_ds = tf.keras.utils.image_dataset_from_directory(
test_data_dir,
seed=123,
image_size=(img_height, img_width),
)

# approach 1: manually rescale data --
rescale = Rescaling(scale=1.0/255)
test_rescale_ds = test_ds.map(lambda image,label:(rescale(image),label))

Found 12566 files belonging to 2 classes.


## Part 2
#### ANN

### VGG16

In [None]:
from keras import layers
from keras import models
import pandas as pd

# Import VGG16 model from Keras applications
from keras.applications.vgg16 import VGG16

#Load the pre-trained VGG16 model with weights trained on ImageNet
vgg_model = VGG16(weights='imagenet', include_top = False, input_shape = (150,150,3))
vgg_model.summary()

# Making all the layers of the VGG model non-trainable. i.e. freezing them
for layer in vgg_model.layers:
    layer.trainable = False

# Initializing the model
new_model = models.Sequential()

# Adding the convolutional part of the VGG16 model from above
new_model.add(vgg_model)

# Flattening the output of the VGG16 model because it is from a convolutional layer
new_model.add(layers.Flatten())

# Adding a dense input layer
new_model.add(layers.Dense(32, activation='relu'))

# Adding dropout prevents overfitting
new_model.add(layers.Dropout(0.2))

# Adding second input layer
new_model.add(layers.Dense(32, activation='relu'))

# Adding output layer
new_model.add(layers.Dense(3, activation='softmax'))

# Compiling the model
new_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# Summary of the model
new_model.summary()

#fit the model from image generator
history = new_model.fit(
            train_rescale_ds,
            batch_size=32,
            epochs=20,
            validation_data=val_rescale_ds,
)

test_loss, test_accuracy = new_model.evaluate(test_rescale_ds, verbose=0)

In [46]:
import tensorflow as tf
from tensorflow.keras.layers import Rescaling

train_data_dir = 'data/train'
test_data_dir = 'data/test'  

batch_size = 32
# target image size
img_height = 128
img_width = 128

# Load training dataset
train_ds = tf.keras.utils.image_dataset_from_directory(
    train_data_dir,
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size,
    label_mode='binary'  
)

# Load validation dataset
val_ds = tf.keras.utils.image_dataset_from_directory(
    train_data_dir,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size,
    label_mode='binary'  
)

# Rescale pixel values from [0, 255] to [0, 1]
rescale = Rescaling(scale=1.0/255)
train_rescale_ds = train_ds.map(lambda image, label: (rescale(image), label))
val_rescale_ds = val_ds.map(lambda image, label: (rescale(image), label))

Found 21294 files belonging to 2 classes.
Using 17036 files for training.
Found 21294 files belonging to 2 classes.
Using 4258 files for validation.
