In [None]:
import shutil
import os
from pathlib import Path

# Remove existing train/test directories if necessary
try:
    shutil.rmtree('data/Project3-split/train')
    shutil.rmtree('data/Project3-split/test')
except:
    pass

# Create train directories for images
Path('data/Project3-split/train/damage').mkdir(parents=True, exist_ok=True)
Path('data/Project3-split/train/no_damage').mkdir(parents=True, exist_ok=True)

# Create test directories for images
Path('data/Project3-split/test/damage').mkdir(parents=True, exist_ok=True)
Path('data/Project3-split/test/no_damage').mkdir(parents=True, exist_ok=True)

# Paths of images for each class (damage and no damage)
all_damage_file_paths = os.listdir('data/Project3/damage')
all_no_damage_file_paths = os.listdir('data/Project3/no_damage')

In [None]:
import random
# Create a random 80/20 training testing split for the damage images
damage_train_file_paths = random.sample(all_damage_file_paths, int(len(all_damage_file_paths) * 0.8))
print(f"Number of damage images in train: {len(damage_train_file_paths)}")
damage_test_file_paths = [path for path in all_damage_file_paths if path not in damage_train_file_paths]
print(f"Number of damage images in test: {len(damage_test_file_paths)}")
overlap = [path for path in damage_train_file_paths if path in damage_test_file_paths]
print(f"Overlap in damage images (should be zero): {len(overlap)}")

# Create a random 80/20 training testing split for the no damage images
no_damage_train_file_paths = random.sample(all_no_damage_file_paths, int(len(all_no_damage_file_paths) * 0.8))
print(f"Number of no damage images in train: {len(no_damage_train_file_paths)}")
no_damage_test_file_paths = [path for path in all_no_damage_file_paths if path not in no_damage_train_file_paths]
print(f"Number of no damage images in test: {len(no_damage_test_file_paths)}")
overlap = [path for path in no_damage_train_file_paths if path in no_damage_test_file_paths]
print(f"Overlap in no damage images (should be zero): {len(overlap)}")

In [None]:
# Copy the images to the train/test directories
for path in damage_train_file_paths:
    shutil.copyfile(f"data/Project3/damage/{path}", f"data/Project3-split/train/damage/{path}")
for path in damage_test_file_paths:
    shutil.copyfile(f"data/Project3/damage/{path}", f"data/Project3-split/test/damage/{path}")
for path in no_damage_train_file_paths:
    shutil.copyfile(f"data/Project3/no_damage/{path}", f"data/Project3-split/train/no_damage/{path}")
for path in no_damage_test_file_paths:
    shutil.copyfile(f"data/Project3/no_damage/{path}", f"data/Project3-split/test/no_damage/{path}")

# Check counts
print("Files in train/damage: ", len(os.listdir('data/Project3-split/train/damage')))
print("Files in train/no_damage: ", len(os.listdir('data/Project3-split/train/no_damage')))
print("Files in test/damage: ", len(os.listdir('data/Project3-split/test/damage')))
print("Files in test/no_damage: ", len(os.listdir('data/Project3-split/test/no_damage')))

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.layers import Rescaling
from tensorflow.keras.utils import image_dataset_from_directory

# Path to the training data directory
train_data_dir = 'data/Project3-split/train'

# Controls size of "batches" of images streamed
# when accesses the dataset, helps control memory usage
batch_size = 32

# Image size to which all images will be resized
img_height = 150
img_width = 150

# Create a training dataset from the directory
train_ds, val_ds = image_dataset_from_directory(
    train_data_dir,
    validation_split=0.2,
    subset="both",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size
)
rescale = Rescaling(1.0/255)

train_rescale_ds = train_ds.map(lambda x, y: (rescale(x), y))
val_rescale_ds = val_ds.map(lambda x, y: (rescale(x), y))

In [None]:
# Do the same for the test dataset
test_data_dir = 'data/Project3-split/test'
test_ds = image_dataset_from_directory(
    test_data_dir,
    image_size=(img_height, img_width),
    batch_size=batch_size
)
test_rescale_ds = test_ds.map(lambda x, y: (rescale(x), y))

In [None]:
# Print image and label shape
print("Image shape: ", train_rescale_ds.element_spec[0].shape)
print("Label shape: ", train_rescale_ds.element_spec[1].shape)