In [1]:
from tensorflow.keras.preprocessing import image_dataset_from_directory
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [5]:
directory = '../data/sat_images/wild_est/'

In [6]:
batch_size = 32
img_size = 256

In [19]:
X_train = image_dataset_from_directory(
    directory, labels='inferred', class_names=None, # maybe need: label_mode='binary' and rename directories to 0 1
    color_mode='rgb', batch_size=batch_size, image_size=(img_size, img_size), shuffle=True, seed=42,
    validation_split=0.2, subset='training', interpolation='bilinear', follow_links=True
)
# this batch size is how it divides the data in the epochs below

Found 16232 files belonging to 2 classes.
Using 12986 files for training.


In [8]:
X_test = image_dataset_from_directory(
    directory, labels='inferred', class_names=None, # # maybe need: label_mode='binary' and rename directories to 0 1
    color_mode='rgb', batch_size=batch_size, image_size=(img_size, img_size), shuffle=True, seed=42, # maybe not need shuffle
    validation_split=0.2, subset='validation', interpolation='bilinear', follow_links=True
)

Found 16232 files belonging to 2 classes.
Using 3246 files for validation.


In [151]:
X_test.element_spec

(TensorSpec(shape=(None, 256, 256, 3), dtype=tf.float32, name=None),
 TensorSpec(shape=(None,), dtype=tf.int32, name=None))

In [None]:
from tensorflow.data.experimental import save
save(X_train, 'data/X_train', compression='GZIP', shard_func=None)
save(X_test, 'data/X_test', compression='GZIP', shard_func=None)

In [9]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow import data

In [75]:
# set parameters
num_classes = 2
epochs = 2 # number of passes through the entire train dataset before weights "final"
AUTOTUNE = data.experimental.AUTOTUNE
# X_train = X_train.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
# X_test = X_test.cache().prefetch(buffer_size=AUTOTUNE)
img_height = 256
img_width = 256
#input_shape = (img_rows, img_cols, 1)   # 1 channel image input (grayscale) KEEP
nb_filters = 16    # number of convolutional filters to use summarizes more
pool_size = (2, 2)  # pooling decreases image size, reduces computation, adds translational invariance
kernel_size = (4, 4)  # convolutional kernel size, slides over image to learn features
#batch_size = 1000 # number of training samples used at a time to update the weights
# model.add(Dropout(0.5))  # zeros out some fraction of inputs (neurons), helps prevent overfitting

# add callback to save weights 
# https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/Callback
# model callback
# reduce LR on plateau

# what about padding? below same uses padding if needed

# '''
# When you apply Dropout to a layer it randomly drops out (by setting the activation to zero) a 
# number of output units from the layer during the training process. Dropout takes a fractional 
# number as its input value, in the form such as 0.1, 0.2, 0.4, etc. This means dropping out 10%, 
# 20% or 40% of the output units randomly from the applied layer.
# '''

In [81]:
model = Sequential([
    layers.experimental.preprocessing.Rescaling(1./255, input_shape=(256, 256, 3)),
    layers.Conv2D(nb_filters, 3, padding='same', activation='relu'), # was 16, 32, 64
    layers.MaxPooling2D(pool_size=pool_size),
    layers.Conv2D(nb_filters, 3, padding='same', activation='relu'), # drop layers.. for initial testing
    layers.MaxPooling2D(pool_size=pool_size),
    layers.Conv2D(nb_filters, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(pool_size=pool_size),
    # insert more layers?
    # insert callback?
    layers.Flatten(),
    layers.Dense(256, activation='relu'), # activation='relu'
    #layers.Dropout(0.3),
    #layers.Dense(num_classes, activation='relu') # or sigmoid for binary? relu?
    layers.Dense(1, activation='sigmoid') # or sigmoid for binary? relu?
])

In [82]:
model.compile(optimizer='adam',
              loss=keras.losses.BinaryCrossentropy(from_logits=False),
              metrics=['accuracy'])
              # if multiclass then use this:
              #loss = keras.losses.CategoricalCrossentropy(from_logits=True),
#               metrics = ['accuracy'])

In [83]:
# check model
model.summary()

Model: "sequential_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
rescaling_13 (Rescaling)     (None, 256, 256, 3)       0         
_________________________________________________________________
conv2d_36 (Conv2D)           (None, 256, 256, 32)      896       
_________________________________________________________________
max_pooling2d_36 (MaxPooling (None, 128, 128, 32)      0         
_________________________________________________________________
conv2d_37 (Conv2D)           (None, 128, 128, 32)      9248      
_________________________________________________________________
max_pooling2d_37 (MaxPooling (None, 64, 64, 32)        0         
_________________________________________________________________
conv2d_38 (Conv2D)           (None, 64, 64, 32)        9248      
_________________________________________________________________
max_pooling2d_38 (MaxPooling (None, 32, 32, 32)      

In [84]:
# run it
# try more epochs (maybe 1000.. at least 100)
history = model.fit(
            X_train,
            validation_data = X_test,
            epochs = epochs,
            #batch_size=batch_size,
            verbose = 1
) # weighted_metrics

Epoch 1/2
Epoch 2/2


In [85]:
score = model.evaluate(X_test, verbose=1) # diff that model.predict ? yes.. just gets scores will predict will return classes
print('Test score:', score[0])
print('Test accuracy:', score[1])  # this is the one we care about

Test score: 0.5555156469345093
Test accuracy: 0.734442412853241
