# Setup

In [None]:
import tensorflow as tf
import os

In [None]:
# Used to avoid Out of Memory Errors
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [None]:
import cv2
import imghdr
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd

In [None]:
data_dir = 'data'

In [None]:
image = cv2.imread(os.path.join(data_dir, '25_-80', '25.00283_-80.532153.jpg'))
image.shape

In [None]:
image = cv2.imread(os.path.join(data_dir, '27_-80', '27.002005_-80.622323_270.jpg'))
image.shape


# Loading & Preprocessing Data


In [None]:
data = tf.keras.utils.image_dataset_from_directory('data')

In [None]:
def convert_label(label):
    labels = label.numpy()
    coords = []
    for label in labels:
        folder = os.listdir(data_dir)[label]
        lat, lon = map(float, folder.split('_'))
        coords.append([lat,lon])
    return np.array(coords, dtype=np.float32)

In [None]:
# For every pair of images & labels in our data, divide all image info by 255, do nothing to labels
data = data.map(lambda x, y: (x/255, y))

In [None]:
data = data.map(lambda x, y: (x, tf.py_function(func=convert_label, inp=[y], Tout=tf.float32)))

In [None]:
scaled_iterator = data.as_numpy_iterator()

In [None]:
batch = scaled_iterator.next()

In [None]:
#### 2 parts to dataset -- Images & Labels
# Images --> 32 images, of size 256 by 256, with 3 layers (RGB) | batch[0].shape = (32, 256, 256, 3)
# Labels --> 32 labels (152nd classification, 354th classification etc. Basically their coordinates) | batch[1] (142, 234, ... 183)
# .min() retrieves lowest value of pixel, .max() retrieves highest value of pixel
batch[0].min(), batch[0].max()
batch[1][0]
batch[1]

In [None]:
fig, ax = plt.subplots(ncols=4, figsize=(20,20))
for idx, img in enumerate(batch[0][:4]):
    ax[idx].imshow(img)
    classification = batch[1][idx]
    #coord_class = os.listdir(data_dir)[classification]
    #ax[idx].title.set_text(f"{coord_class} | {classification}")
    ax[idx].title.set_text(f"{classification[0]}, {classification[1]}")

## Splitting Data

In [None]:
train_size = int(len(data) * .7)
validation_size = int(len(data) * .15)
test_size = int(len(data) * .15)
train_size, validation_size, test_size

In [None]:
train_ds = data.take(train_size)
val_ds = data.skip(train_size).take(validation_size)
test_ds = data.skip(train_size + validation_size)

# Building the Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten
from tensorflow.keras.preprocessing import image

In [None]:
model = Sequential()

In [None]:
model.add(Conv2D(32, (3,3), 1, activation='relu', input_shape = (256,256,3)))
model.add(MaxPooling2D())

model.add(Conv2D(64, (3,3), 1, activation='relu'))
model.add(MaxPooling2D())

model.add(Conv2D(128, (3,3), 1, activation='relu'))
model.add(MaxPooling2D())

model.add(Flatten())

model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(2, activation='linear'))

In [None]:
model.compile('adam', loss= 'mse', metrics = ['mae'])

In [None]:
model.summary()

In [None]:
logdir = 'logs'

In [None]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

In [None]:
print('hi')

In [None]:
history = model.fit(train_ds, epochs=20, validation_data = val_ds, callbacks = [tensorboard_callback])

In [None]:
history.history

In [None]:
from tensorflow.keras.models import load_model

In [None]:
model.save(os.path.join('models', 'geoCNNmodel.h5'))

In [None]:
new_model = load_model(os.path.join('models', 'geoCNNmodel.h5'))

In [None]:
new_model

In [None]:
fig = plt.figure()
plt.plot(new_model.history['loss'], color = 'aqua', label = 'loss')
plt.plot(new_model.history['val_loss'], color = 'orange', label = 'val_loss')
fig.suptitle('Loss')
plt.legend(loc='upper left')
plt.show