In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, layers, models, losses, Model
from typing import Tuple
tf.compat.v1.enable_eager_execution()


os.environ["cuda_visible_devices"]="0" 

from glob import glob
import io
from PIL import Image

import math, re, os
import numpy as np

from collections import Counter

print("Tensorflow version " + tf.__version__)

Tensorflow version 2.8.0


In [None]:
if tf.test.gpu_device_name(): 

    print('Default GPU Device:{}'.format(tf.test.gpu_device_name()))

In [None]:
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""PREPROCESSING"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

'PREPROCESSING'

In [None]:
IMAGE_SIZE = [224, 224]
PATH = '/content/drive/MyDrive/Colab_Notebooks/combined_data_splitted'

AUTO = tf.data.experimental.AUTOTUNE
BATCH_SIZE = 16

TRAIN_PATH = tf.io.gfile.glob(PATH + '/train/*.tfrecord')
VAL_PATH = tf.io.gfile.glob(PATH + '/validation/*.tfrecord')
TEST_PATH = tf.io.gfile.glob(PATH + '/test/*.tfrecord')

In [None]:
CLASSES = ['pink primrose',
 'wild geranium',
 'spear thistle',
 'yellow iris',
 'wallflower',
 'buttercup',
 'daisy',
 'common dandelion',
 'petunia',
 'sunflower',
 'iris',
 'windflower',
 'water lily',
 'rose',
 'morning glory',
 'frangipani',
 'cyclamen ',
 'foxglove',
 'common tulip',
 'wild rose']


In [None]:

def decode_image(image_data):
    image = tf.image.decode_jpeg(image_data, channels=3)
    image = tf.cast(image, tf.float32) / 255.0  # convert image to floats in [0, 1] range
    image = tf.reshape(image, [*IMAGE_SIZE, 3]) # explicit size needed for TPU
    return image

def read_labeled_tfrecord(example):
    LABELED_TFREC_FORMAT = {
        "image": tf.io.FixedLenFeature([], tf.string), # tf.string means bytestring
        "class": tf.io.FixedLenFeature([], tf.int64),  # shape [] means single element
    }
    example = tf.io.parse_single_example(example, LABELED_TFREC_FORMAT)
    image = decode_image(example['image'])
    label = tf.cast(example['class'], tf.int32)
    return image, label # returns a dataset of (image, label) pairs


def load_dataset(filenames, labeled=True, ordered=False):
    # Read from TFRecords. For optimal performance, reading from multiple files at once and
    # disregarding data order. Order does not matter since we will be shuffling the data anyway.

    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False # disable order, increase speed

    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTO) # automatically interleaves reads from multiple files
    dataset = dataset.with_options(ignore_order) # uses data as soon as it streams in, rather than in its original order
    dataset = dataset.map(read_labeled_tfrecord, num_parallel_calls=AUTO)
    # returns a dataset of (image, label) pairs if labeled=True or (image, id) pairs if labeled=False
    return dataset

In [None]:
def duplicate_label(image, label):
  return image, (label, label, label)

def data_augment(image, label):
    # Thanks to the dataset.prefetch(AUTO)
    # statement in the next function (below), this happens essentially
    # for free on TPU. Data pipeline code is executed on the "CPU"
    # part of the TPU while the TPU itself is computing gradients.
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_saturation(image, 0, 2)
    return image, label   


def get_training_dataset(path = TRAIN_PATH, labeled = True):
    dataset = load_dataset(path)
    dataset = dataset.map(data_augment, num_parallel_calls=AUTO)
    dataset = dataset.shuffle(6536)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTO) # prefetch next batch while training (autotune prefetch buffer size)
    return dataset

def get_validation_dataset(path = VAL_PATH):
    dataset = load_dataset(path)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.cache()
    dataset = dataset.prefetch(AUTO)
    return dataset

def get_test_dataset(path = TEST_PATH):
    dataset = load_dataset(path)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTO)
    return dataset


In [None]:
ds_train = get_training_dataset()
ds_val = get_validation_dataset()
ds_test = get_test_dataset()

In [None]:
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""NETWORK DESIGN"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

'NETWORK DESIGN'

In [None]:
  model = models.Sequential()
  #from here, its just stacking layers, note that input shape ignores batch size
  #conv + maxpool layers
  model.add(layers.Conv2D(16, (3, 3), activation='relu',padding  = "same", input_shape=(224,224,3), data_format="channels_last"))
  model.add(layers.MaxPooling2D((2, 2)))
  model.add(layers.Conv2D(32, (3, 3), activation='relu', padding  = "same"))
  model.add(layers.MaxPooling2D((2, 2)))
  model.add(layers.Conv2D(32, (3, 3), activation='relu', padding  = "same"))
  model.add(layers.MaxPooling2D((2, 2)))
  model.add(layers.Conv2D(32, (3, 3), activation='relu', padding  = "same"))
  model.add(layers.MaxPooling2D((2, 2)))
  #fully-connected layers
  model.add(layers.Flatten())
  model.add(layers.Dense(32, activation='relu'))
  model.add(layers.Dense(32, activation='relu'))
  model.add(layers.Dense(20, activation = 'softmax'))

In [None]:
model.compile(optimizer='adam', loss=losses.sparse_categorical_crossentropy, metrics=['accuracy'])

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 224, 224, 16)      448       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 112, 112, 16)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 112, 112, 32)      4640      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 56, 56, 32)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 56, 56, 32)        9248      
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 28, 28, 32)       0

In [None]:
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy',min_delta = 0.05, mode = "max" ,patience=15, restore_best_weights=True)
callbacks = [early_stop]
history = model.fit(ds_train, validation_data=ds_val, epochs=120, callbacks = callbacks)


Epoch 1/120
Epoch 2/120
 47/409 [==>...........................] - ETA: 3:56 - loss: 2.3956 - accuracy: 0.2500

KeyboardInterrupt: ignored

In [None]:
fig, axs = plt.subplots(2, 1, figsize=(15,15))

axs[0].plot(history.history['loss'])
axs[0].plot(history.history['val_loss'])
axs[0].title.set_text('Training Loss vs Validation Loss')
axs[0].set_xlabel('Epochs')
axs[0].set_ylabel('Loss')
axs[0].legend(['Train','Val'])

axs[1].plot(history.history['accuracy'])
axs[1].plot(history.history['val_accuracy'])
axs[1].title.set_text('Training Accuracy vs Validation Accuracy')
axs[1].set_xlabel('Epochs')
axs[1].set_ylabel('Accuracy')
axs[1].legend(['Train', 'Val'])

In [None]:
path_to_file = "/content/drive/MyDrive/Colab_Notebooks/Models/RandomModel"
model.save(path_to_file)