In [None]:
"""
Hello y'all see this?
"""
from google.colab import drive
import random
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.image import imread
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img
from sklearn.preprocessing import LabelEncoder

In [None]:
# Connect Colab to Google Drive
drive.mount('/content/drive', force_remount=True)

In [None]:
# Unzip training data, run once
#!unzip drive/MyDrive/data/train.zip -d drive/MyDrive/data/

In [None]:
# Unzip validation data, run once
#!unzip drive/MyDrive/data/validation.zip -d drive/MyDrive/data/

In [None]:
# This code chunk puts absolute paths of all training/validation images and
# their labels in seperate dataframes
train_paths = []
train_labels = []
validation_paths = []
validation_labels = []
troot = 'drive/MyDrive/data/train-cropped'
vroot = 'drive/MyDrive/data/validation-cropped'

for f in os.listdir(troot):
  #tpath = os.path.join(troot, f)
  tlabel = f[-10:-8]
  train_paths.append(f)
  train_labels.append(tlabel)

for f in os.listdir(vroot):
  #vpath = os.path.join(vroot, f)
  vlabel = f[-10:-8]
  validation_paths.append(f)
  validation_labels.append(vlabel)

train_images = pd.DataFrame({'path': train_paths,
                             'label': train_labels})
validation_images = pd.DataFrame({'path': validation_paths,
                                  'label': validation_labels})

In [None]:
def get_subset(n, data, seed=123):
  """
  input: number of samples, dataframe to be sampled from

  output: subset of rows from dataframe
  """
  random.seed(seed)
  sub = random.sample(range(data.shape[0] + 1), n)
  return data.iloc[sub,:]

In [None]:
imageWidth, imageHeight = 128, 128
channel = 1
imageSize = (imageWidth, imageHeight)
nCategories = 5

# Create model
model = Sequential()

model.add(Conv2D(filters = 64,
                 kernel_size=2,
                 activation='relu',
                 input_shape=(imageWidth, imageHeight, channel)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.25))

model.add(Conv2D(16,
                 kernel_size = 2,
                 activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.25))

#model.add(Conv2D(filters = 128,
#                 kernel_size=2,
#                 activation='relu'))
#model.add(BatchNormalization())
#model.add(MaxPooling2D(pool_size=2))
#model.add(Dropout(0.25))

model.add(Flatten())

model.add(Dense(32, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(nCategories, activation='softmax'))

model.compile(loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
# Prepare training data generator

train_subset = get_subset(1000, train_images)

train_generator = ImageDataGenerator(
    rescale=1./255
).flow_from_dataframe(
    dataframe = train_subset,
    directory = troot,         # `train_subset` contains absolute paths
    x_col='path',
    y_col='label',
    class_mode='categorical',  # target is 2-D array of one-hot encoded labels
    target_size=imageSize,
    color_mode="grayscale",
    shuffle=False
    )
label_map = train_generator.class_indices

In [None]:
## Model Training:
history = model.fit(
    train_generator,
    epochs = 1
)

In [None]:
# Prepare validation data generator

validation_subset = get_subset(1000, validation_images)

validation_generator = ImageDataGenerator(
    rescale=1./255
).flow_from_dataframe(
    dataframe = validation_subset,
    directory = vroot,
    x_col='path',
    class_mode= None,
    target_size=imageSize,
    color_mode="grayscale",
    shuffle=False
    )

In [None]:
## Make categorical prediction:
print(" --- Predicting on validation data ---")
phat = model.predict(validation_generator)
print("Predicted probability array shape:", phat.shape)
print("Example:\n", phat[:5])

In [None]:
## Convert labels to categories:
validation_subset['predicted'] = pd.Series(np.argmax(phat, axis=1),
                                           index=validation_subset.index)
print(validation_subset.head())
labelMap = {v: k for k, v in label_map.items()}
validation_subset["predicted"] = validation_subset.predicted.replace(labelMap)
print("confusion matrix (validation)")
print(pd.crosstab(validation_subset.label, validation_subset.predicted))
print("Validation accuracy", np.mean(validation_subset.label == validation_subset.predicted))

In [None]:
## Print and plot misclassified results
wrongResults = validation_subset[validation_subset.predicted != validation_subset.label]
rows = np.random.choice(wrongResults.index, min(4, wrongResults.shape[0]), replace=False)
print("Example wrong results (validation data)")
print(wrongResults.sample(min(10, wrongResults.shape[0])))

## Plot 4 wrong and 4 correct results
plt.figure(figsize=(12, 12))
index = 1
for row in rows:
    filename = wrongResults.loc[row, 'path']
    predicted = wrongResults.loc[row, 'predicted']
    img = load_img(os.path.join(vpath, filename), target_size=imageSize)
    plt.subplot(4, 2, index)
    plt.imshow(img)
    plt.xlabel(filename + " ({})".format(predicted))
    index += 1
# now show correct results
index = 5
correctResults = validation_subset[validation_subset.predicted == validation_subset.label]
rows = np.random.choice(correctResults.index,
                        min(4, correctResults.shape[0]), replace=False)
for row in rows:
    filename = correctResults.loc[row, 'path']
    predicted = correctResults.loc[row, 'predicted']
    img = load_img(os.path.join(vpath, filename), target_size=imageSize)
    plt.subplot(4, 2, index)
    plt.imshow(img)
    plt.xlabel(filename + " ({})".format(predicted))
    index += 1
plt.tight_layout()
plt.show()