In [138]:
from tensorflow.keras.preprocessing import image_dataset_from_directory
import tensorflow
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [38]:
directory = '../data/sat_images/wild_est_after_exc/'

In [39]:
batch_size = 32
img_size = 256

In [126]:
X_train = image_dataset_from_directory(
    directory, labels='inferred', class_names=None, 
    color_mode='rgb', batch_size=batch_size, image_size=(img_size, img_size), shuffle=True, seed=42,
    validation_split=0.25, subset='training', interpolation='bilinear', follow_links=True
)
# this batch size is how it divides the data in the epochs below

Found 16220 files belonging to 2 classes.
Using 12165 files for training.


In [127]:
# set the batch size to the entire set so we can split it
X_test = image_dataset_from_directory(
    directory, labels='inferred', class_names=None, 
    color_mode='rgb', batch_size=4055, image_size=(img_size, img_size), shuffle=True, seed=42, 
    validation_split=0.25, subset='validation', interpolation='bilinear', follow_links=True
)

Found 16220 files belonging to 2 classes.
Using 4055 files for validation.


In [105]:
X_test.element_spec

(TensorSpec(shape=(None, 256, 256, 3), dtype=tf.float32, name=None),
 TensorSpec(shape=(None,), dtype=tf.int32, name=None))

In [128]:
X_test

<BatchDataset shapes: ((None, 256, 256, 3), (None,)), types: (tf.float32, tf.int32)>

In [130]:
# calc sizes
holdout_size = int(0.3 * 4055)
test_size = 4055 - holdout_size
print(f' holdout size: {holdout_size}, test size: {test_size}')

# pull X and y in tensors
X_test_images, X_test_labels = next(iter(X_test))
# split the first into holdout
X_holdout_images = X_test_images[:holdout_size,...]
X_holdout_labels = X_test_labels[:holdout_size]
# put the rest in X_test
X_test_images = X_test_images[holdout_size:,...]
X_test_labels = X_test_labels[holdout_size:]

 holdout size: 1216, test size: 2839


In [134]:
X_test

<BatchDataset shapes: ((None, 256, 256, 3), (None,)), types: (tf.float32, tf.int32)>

In [143]:
# put into datasets
X_test1 = tensorflow.data.Dataset.from_tensors((X_test_images, X_test_labels))
X_holdout1 = tensorflow.data.Dataset.from_tensors((X_holdout_images, X_holdout_labels))

In [142]:
X_test1

<TensorDataset shapes: ((2839, 256, 256, 3), (2839,)), types: (tf.float32, tf.int32)>

In [115]:
# # create holdout set from X test?
# # or using this?
# # way to do it automatically?
# holdout_size = int(0.3 * 4055)
# test_size = 4055 - holdout_size
# print(f' holdout size: {holdout_size}, test size: {test_size}')
# # X_test_final = X_test.skip(holdout_size)
# # X_holdout = X_test.take(holdout_size)
# # X_test_final = X_test.take(test_size)

# X_test_final = X_test.skip(holdout_size) # skip the first 1216..
# X_holdout = X_test.take(holdout_size) # get the rest.. ?

 holdout size: 1216, test size: 2839


In [151]:
X_test.class_names

AttributeError: 'PrefetchDataset' object has no attribute 'class_names'

In [151]:
# X_test.element_spec

(TensorSpec(shape=(None, 256, 256, 3), dtype=tf.float32, name=None),
 TensorSpec(shape=(None,), dtype=tf.int32, name=None))

In [None]:
# from tensorflow.data.experimental import save
# save(X_train, 'data/X_train', compression='GZIP', shard_func=None)
# save(X_test, 'data/X_test', compression='GZIP', shard_func=None)

In [144]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow import data

In [145]:
num_classes = 2
epochs = 10 # number of passes through the entire train dataset before weights "final"
AUTOTUNE = data.experimental.AUTOTUNE
img_height = 256
img_width = 256
nb_filters = 32    # number of convolutional filters to use - want 1 for each "feature" you think exists in images if more, more parameters
pool_size = (2, 2)  # pooling decreases image size, reduces computation, adds translational invariance
kernel_size = (2, 2)  # convolutional kernel size, slides over image to learn features
X_train = X_train.cache().shuffle(32).prefetch(buffer_size=AUTOTUNE) 
X_test = X_test1.cache().prefetch(buffer_size=AUTOTUNE)

In [146]:
model = Sequential([
    layers.experimental.preprocessing.Rescaling(1./255, input_shape=(256, 256, 3)),
    layers.experimental.preprocessing.RandomFlip("horizontal", 
                                                 input_shape=(img_height, 
                                                              img_width,
                                                              3)),
    layers.experimental.preprocessing.RandomRotation(0.1),
    layers.experimental.preprocessing.RandomZoom(0.1),
    layers.Conv2D(nb_filters, (kernel_size[0], kernel_size[1]), padding='same', activation='relu'), # was 16, 32, 64
    layers.MaxPooling2D(pool_size=pool_size),
    layers.Conv2D(nb_filters*2, (kernel_size[0], kernel_size[1]), padding='same', activation='relu'), # drop layers.. for initial testing
    layers.MaxPooling2D(pool_size=pool_size),
    layers.Conv2D(nb_filters*3, (kernel_size[0], kernel_size[1]), padding='same', activation='relu'),
    layers.MaxPooling2D(pool_size=pool_size),
    layers.Conv2D(nb_filters*4, (kernel_size[0], kernel_size[1]), padding='same', activation='relu'),
    layers.MaxPooling2D(pool_size=pool_size),
    # insert more layers?
    # insert callback?
    layers.Flatten(),
    layers.Dense(256, activation='relu'), # increase this? add another dense layer?
    layers.Dropout(0.5),
    #layers.Dense(num_classes, activation='relu') # or sigmoid for binary? relu?
    layers.Dense(1, activation='sigmoid') # or sigmoid for binary? relu?
])

In [147]:
model.compile(optimizer='adam',
              loss=keras.losses.BinaryCrossentropy(from_logits=False),
              metrics=['accuracy'])
              # if multiclass then use this:
              #loss = keras.losses.CategoricalCrossentropy(from_logits=True),
#               metrics = ['accuracy'])

In [148]:
# check model
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
rescaling_3 (Rescaling)      (None, 256, 256, 3)       0         
_________________________________________________________________
random_flip_3 (RandomFlip)   (None, 256, 256, 3)       0         
_________________________________________________________________
random_rotation_3 (RandomRot (None, 256, 256, 3)       0         
_________________________________________________________________
random_zoom_3 (RandomZoom)   (None, 256, 256, 3)       0         
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 256, 256, 32)      416       
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 128, 128, 32)      0         
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 128, 128, 64)     

In [152]:
# run it
# try more epochs (maybe 1000.. at least 100)
history = model.fit(
            X_train,
            validation_data = X_test,
            epochs = 1,
            #batch_size=batch_size,
            verbose = 1
) # weighted_metrics



In [153]:
score = model.evaluate(X_test, verbose=1) 
print('Test score:', score[0])
print('Test accuracy:', score[1])  

Test score: 0.6400513052940369
Test accuracy: 0.6710109114646912
