In [1]:
from tensorflow.keras.preprocessing import image_dataset_from_directory
import tensorflow
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
directory = '../data/symlink_data/all_US_wild_est'

In [3]:
batch_size = 32
img_size = 256 # keep original size and resize within model if needed

In [17]:
X_train = image_dataset_from_directory(
    directory, labels='inferred', class_names=None, 
    color_mode='rgb', batch_size=16, image_size=(img_size, img_size), shuffle=True, seed=42,
    validation_split=0.25, subset='training', interpolation='bilinear', follow_links=True
)
# this batch size is how it divides the data in the epochs below

Found 31498 files belonging to 2 classes.
Using 23624 files for training.


In [5]:
# set the batch size to the entire test set so we can split it
X_test = image_dataset_from_directory(
    directory, labels='inferred', class_names=None, 
    color_mode='rgb', batch_size=7874, image_size=(img_size, img_size), shuffle=True, seed=42, 
    validation_split=0.25, subset='validation', interpolation='bilinear', follow_links=True
)

Found 31498 files belonging to 2 classes.
Using 7874 files for validation.


In [6]:
# calc sizes
holdout_size = int(0.3 * 7874)
test_size = 7874 - holdout_size
print(f' holdout size: {holdout_size}, test size: {test_size}')

# pull X and y in tensors
X_test_images, X_test_labels = next(iter(X_test))
# split the first into holdout
X_holdout_images = X_test_images[:holdout_size,...]
X_holdout_labels = X_test_labels[:holdout_size]
# put the rest in X_test
X_test_images = X_test_images[holdout_size:,...]
X_test_labels = X_test_labels[holdout_size:]

 holdout size: 2362, test size: 5512


In [7]:
# put into datasets
X_test1 = tensorflow.data.Dataset.from_tensor_slices((X_test_images, X_test_labels))
X_holdout1 = tensorflow.data.Dataset.from_tensor_slices((X_holdout_images, X_holdout_labels))

In [None]:
# # batch them same as X_train
# X_test1 = X_test1.batch(32)
# X_holdout1 = X_holdout1.batch(32)

In [8]:
X_train.element_spec

(TensorSpec(shape=(None, 256, 256, 3), dtype=tf.float32, name=None),
 TensorSpec(shape=(None,), dtype=tf.int32, name=None))

In [9]:
X_holdout1.element_spec

(TensorSpec(shape=(256, 256, 3), dtype=tf.float32, name=None),
 TensorSpec(shape=(), dtype=tf.int32, name=None))

In [10]:
X_test1.element_spec

(TensorSpec(shape=(256, 256, 3), dtype=tf.float32, name=None),
 TensorSpec(shape=(), dtype=tf.int32, name=None))

In [11]:
X_test.class_names

['Established Campground', 'Wild Camping']

In [12]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow import data

In [13]:
##### SAVE DATASETS ##### this takes alot of ram..
from tensorflow.data.experimental import save

In [18]:
save(X_train, '../data/datasets/all_US_data/X_train_256px_16batch', compression='GZIP', shard_func=None)

In [15]:
save(X_test1, '../data/datasets/all_US_data/X_test_256px_unbatched', compression='GZIP', shard_func=None)

In [16]:
save(X_holdout1, '../data/datasets/all_US_data/X_holdout_256px_unbatched', compression='GZIP', shard_func=None)

In [25]:
from tensorflow import TensorSpec, float32, int32
from tensorflow.data.experimental import load

img_height = 350
img_width = 350
final_dense = 350

X_train_elem_spec = (TensorSpec(shape=(None, img_height, img_width, 3), dtype=float32, name=None), TensorSpec(shape=(None,), dtype=int32, name=None))
X_test_elem_spec = (TensorSpec(shape=(None, img_height, img_width, 3), dtype=float32, name=None), TensorSpec(shape=(None,), dtype=int32, name=None))

# path to files:
X_train_data_path = '../data/datasets/all_US_data/X_train_32batch'
X_test_data_path = '../data/datasets/all_US_data/X_test_32batch'
X_train = load(X_train_data_path, element_spec=X_train_elem_spec, compression='GZIP', reader_func=None)
X_test = load(X_test_data_path, element_spec=X_test_elem_spec, compression='GZIP', reader_func=None)

In [26]:
num_classes = 2
epochs = 1 # number of passes through the entire train dataset before weights "final"
AUTOTUNE = data.experimental.AUTOTUNE
nb_filters = 32    # number of convolutional filters to use - want 1 for each "feature" you think exists in images if more, more parameters
pool_size = (2, 2)  # pooling decreases image size, reduces computation, adds translational invariance
kernel_size = (2, 2)  # convolutional kernel size, slides over image to learn features
X_train = X_train.cache().shuffle(32).prefetch(buffer_size=AUTOTUNE) # shuffle same as batch size
X_test = X_test.cache().prefetch(buffer_size=AUTOTUNE)

In [27]:
model = Sequential([
    # try original (350 x 350), and then resized 256 x 256 - change params above
#     layers.experimental.preprocessing.Resizing(img_height, img_width, interpolation='bilinear')
    layers.experimental.preprocessing.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
    layers.experimental.preprocessing.RandomFlip("horizontal", 
                                                 input_shape=(img_height, 
                                                              img_width,
                                                              3)),
    layers.experimental.preprocessing.RandomRotation(0.1),
    layers.experimental.preprocessing.RandomZoom(0.1),
    layers.Conv2D(nb_filters, (kernel_size[0], kernel_size[1]), padding='same', activation='relu'), # was 16, 32, 64
    layers.MaxPooling2D(pool_size=pool_size),
    layers.Conv2D(nb_filters*2, (kernel_size[0], kernel_size[1]), padding='same', activation='relu'), # drop layers.. for initial testing
    layers.MaxPooling2D(pool_size=pool_size),
    layers.Conv2D(nb_filters*3, (kernel_size[0], kernel_size[1]), padding='same', activation='relu'),
    layers.MaxPooling2D(pool_size=pool_size),
    layers.Conv2D(nb_filters*4, (kernel_size[0], kernel_size[1]), padding='same', activation='relu'),
    layers.MaxPooling2D(pool_size=pool_size),

    layers.Flatten(),
    layers.Dense(final_dense, activation='relu'), 
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid') 
])

In [28]:
model.compile(optimizer='adam',
              loss=keras.losses.BinaryCrossentropy(from_logits=False),
              metrics=['accuracy'])

In [29]:
# check model
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
rescaling (Rescaling)        (None, 350, 350, 3)       0         
_________________________________________________________________
random_flip (RandomFlip)     (None, 350, 350, 3)       0         
_________________________________________________________________
random_rotation (RandomRotat (None, 350, 350, 3)       0         
_________________________________________________________________
random_zoom (RandomZoom)     (None, 350, 350, 3)       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 350, 350, 32)      416       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 175, 175, 32)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 175, 175, 64)      8

In [31]:
# run it
history = model.fit(
            X_train,
            validation_data = X_test,
            epochs = 1,
            #batch_size=batch_size,
            verbose = 1
)



In [None]:
score = model.evaluate(X_test, verbose=1) 
print('Test score:', score[0])
print('Test accuracy:', score[1])  