In [1]:
# generic imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
# notebook settings
%config IPCompleter.greedy=True
%load_ext autoreload
%autoreload 2 
# precision and plot settings
num_precision = 3
np.set_printoptions(precision=num_precision, suppress=True)
pd.set_option('display.float_format', lambda x: f'{x:,.{num_precision}f}')
pd.set_option("display.precision", num_precision)
pd.set_option('display.max_columns', None)
plt.rcParams['font.size'] = 16
plt.rcParams['legend.fontsize'] = 'large'
plt.rcParams['figure.titlesize'] = 'medium'
plt.rcParams['lines.linewidth'] = 2

In [2]:
# setup dir and import helper functions
import sys, os
sys.path.append(os.path.join(os.path.dirname(sys.path[0]),'src'))
import helper_funcs as my_funcs
from image_dataset_from_dir_return_paths import image_dataset_from_directory_paths

In [3]:
# from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow import data
from tensorflow.keras import callbacks

In [4]:
directory = '../data/symlink_data/wild_est_test'

In [5]:
batch_size = 32
img_size = 256

In [14]:
# make a dataset of 2
X_train, X_train_img_paths = image_dataset_from_directory_paths(
    directory, labels='inferred', class_names=None, 
    color_mode='rgb', batch_size=8, image_size=(img_size, img_size), shuffle=True, seed=42,
    validation_split=0.25, subset='training', interpolation='bilinear', follow_links=True
)

Found 7834 files belonging to 2 classes.
Using 5876 files for training.


In [8]:
X_train_img_paths

['../data/symlink_data/wild_est_test/Established Campground/satimg_ID_264_Established Campground_17_43.460267_-113.560938_rot180.png',
 '../data/symlink_data/wild_est_test/Wild Camping/satimg_ID_11390_Wild Camping_17_47.702711_-117.031512_rot270.png',
 '../data/symlink_data/wild_est_test/Established Campground/satimg_AZ_18_Established Campground_17_31.703691999999997_-111.063476.png',
 '../data/symlink_data/wild_est_test/Wild Camping/satimg_MT_7384_Wild Camping_17_48.782051_-114.283557_rot270.png',
 '../data/symlink_data/wild_est_test/Established Campground/satimg_ID_983_Established Campground_17_42.956692_-115.307018_rot270.png',
 '../data/symlink_data/wild_est_test/Wild Camping/satimg_AZ_17_Wild Camping_17_32.784379_-109.602116.png']

In [15]:
X_test, X_test_img_paths = image_dataset_from_directory_paths(
    directory, labels='inferred', class_names=None, 
    color_mode='rgb', batch_size=batch_size, image_size=(img_size, img_size), shuffle=True, seed=42, 
    validation_split=0.25, subset='validation', interpolation='bilinear', follow_links=True
)

Found 7834 files belonging to 2 classes.
Using 1958 files for validation.


In [10]:
X_test_img_paths

['../data/symlink_data/wild_est_test/Established Campground/satimg_MT_1237_Established Campground_17_46.65782_-111.70844_rot180.png',
 '../data/symlink_data/wild_est_test/Wild Camping/satimg_MT_5076_Wild Camping_17_47.812919_-113.854707_rot180.png']

In [12]:
X_train_img_paths[:2]

['../data/symlink_data/wild_est_test/Established Campground/satimg_ID_264_Established Campground_17_43.460267_-113.560938_rot180.png',
 '../data/symlink_data/wild_est_test/Wild Camping/satimg_ID_11390_Wild Camping_17_47.702711_-117.031512_rot270.png']

In [20]:
def get_class_weights(X_train):
    class_names = X_train.class_names
    labels = np.concatenate([y for x, y in X_train], axis=0)
    if np.ndim(labels) == 1:
        # for binary
        weights = [len(labels) - labels.sum(), labels.sum()]
    else:
        weights = list(np.sum(labels, axis=0))
    class_weights = {}
    # for class_, weight in zip(class_names, weights):
    #     class_weights[class_] = weight
    for i, weight in enumerate(weights):
        class_weights[i] = weight
    return class_names, class_weights

In [13]:
directory = '/Users/pault/Desktop/github/CampsitePredict/data/symlink_data/unique_wild_est_for_aligned_model' 

In [23]:
class_names, class_weights = get_class_weights(X_train)

In [24]:
class_weights

{0: 3, 1: 3}

In [15]:
num_classes = 2
epochs = 1 
AUTOTUNE = data.experimental.AUTOTUNE
img_height = 256
img_width = 256
nb_filters = 32    # number of convolutional filters to use - want 1 for each "feature" you think exists in images if more, more parameters
pool_size = (2, 2)  # pooling decreases image size, reduces computation, adds translational invariance
kernel_size = (2, 2)  # convolutional kernel size, slides over image to learn features
X_train = X_train.cache().shuffle(32).prefetch(buffer_size=AUTOTUNE) 
X_test = X_test.cache().prefetch(buffer_size=AUTOTUNE)

In [16]:
model = Sequential([
    layers.experimental.preprocessing.Rescaling(1./255, input_shape=(256, 256, 3)),
    layers.experimental.preprocessing.RandomFlip("horizontal", 
                                                 input_shape=(img_height, 
                                                              img_width,
                                                              3)),
    layers.experimental.preprocessing.RandomRotation(0.1),
    layers.experimental.preprocessing.RandomZoom(0.1),
    layers.Conv2D(nb_filters, (kernel_size[0], kernel_size[1]), padding='same', activation='relu'), # was 16, 32, 64
    layers.MaxPooling2D(pool_size=pool_size),
    layers.Conv2D(nb_filters*2, (kernel_size[0], kernel_size[1]), padding='same', activation='relu'), # drop layers.. for initial testing
    layers.MaxPooling2D(pool_size=pool_size),
    layers.Conv2D(nb_filters*3, (kernel_size[0], kernel_size[1]), padding='same', activation='relu'),
    layers.MaxPooling2D(pool_size=pool_size),
    layers.Conv2D(nb_filters*4, (kernel_size[0], kernel_size[1]), padding='same', activation='relu'),
    layers.MaxPooling2D(pool_size=pool_size),
    layers.Flatten(),
    layers.Dense(256, activation='relu'), # increase this? add another dense layer?
    layers.Dropout(0.5),
    #layers.Dense(num_classes, activation='relu') # or sigmoid for binary? relu?
    layers.Dense(1, activation='sigmoid') # or sigmoid for binary? relu?
])

In [17]:
model.compile(optimizer='adam',
              loss=keras.losses.BinaryCrossentropy(from_logits=False),
              metrics=['accuracy'])

In [18]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
rescaling (Rescaling)        (None, 256, 256, 3)       0         
_________________________________________________________________
random_flip (RandomFlip)     (None, 256, 256, 3)       0         
_________________________________________________________________
random_rotation (RandomRotat (None, 256, 256, 3)       0         
_________________________________________________________________
random_zoom (RandomZoom)     (None, 256, 256, 3)       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 256, 256, 32)      416       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 128, 128, 32)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 128, 128, 64)      8

In [19]:
# run it
history = model.fit(
            X_train,
            validation_data = X_test,
            epochs = 5,
            #batch_size=batch_size,
            verbose = 1,
#             callbacks=my_callbacks
) # weighted_metrics

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
# testing this plot function
model_name = 'test'
fig, axs = plt.subplots(1, 2, figsize=(10, 8))
my_funcs.plot_train_val_acc(history, 5, model_name, axs)