In [1]:
import numpy as np
import os

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [3]:
from PIL import Image

In [4]:
def get_data_path():
    return '/Users/thibaultdesfontaines/data'

In [5]:
print(tf.version.VERSION)

2.1.0


In [6]:
data_dir = os.path.join(get_data_path(), "leaf-classification")
train_dir = os.path.join(data_dir, 'train_images')
validation_dir = os.path.join(data_dir, 'validation_images')
test_dir = os.path.join(data_dir, 'test_images')

In [7]:
! tree {validation_dir}

[01;34m/Users/thibaultdesfontaines/data/leaf-classification/validation_images[00m
├── [01;34mAcer_Capillipes[00m
│   ├── [01;32m201.jpg[00m
│   └── [01;32m227.jpg[00m
├── [01;34mAcer_Circinatum[00m
│   ├── [01;32m160.jpg[00m
│   └── [01;32m164.jpg[00m
├── [01;34mAcer_Mono[00m
│   ├── [01;32m216.jpg[00m
│   └── [01;32m288.jpg[00m
├── [01;34mAcer_Opalus[00m
│   ├── [01;32m1.jpg[00m
│   └── [01;32m317.jpg[00m
├── [01;34mAcer_Palmatum[00m
│   ├── [01;32m118.jpg[00m
│   └── [01;32m27.jpg[00m
├── [01;34mAcer_Pictum[00m
│   ├── [01;32m146.jpg[00m
│   └── [01;32m311.jpg[00m
├── [01;34mAcer_Platanoids[00m
│   ├── [01;32m322.jpg[00m
│   └── [01;32m38.jpg[00m
├── [01;34mAcer_Rubrum[00m
│   ├── [01;32m238.jpg[00m
│   └── [01;32m698.jpg[00m
├── [01;34mAcer_Rufinerve[00m
│   ├── [01;32m148.jpg[00m
│   └── [01;32m364.jpg[00m
├── [01;34mAcer_Saccharinum[00m
│   ├── [01;32m167.jpg[00m
│   └── [01;32m78.jpg[00m


In [8]:
CLASS_NAMES = [x for x in sorted(os.listdir(str(train_dir))) if x[0] != '.']
CLASS_NAMES = np.array(CLASS_NAMES)
print("Number of classes: {}".format(len(CLASS_NAMES)))

Number of classes: 99


In [9]:
checkpoint_dir = "/Users/thibaultdesfontaines/data/training_1/"
latest = tf.train.latest_checkpoint(checkpoint_dir)
print("{}".format(latest))

/Users/thibaultdesfontaines/data/training_1/cp.ckpt


In [10]:
IMG_HEIGHT = 224
IMG_WIDTH = 224

In [11]:
def create_model():

    model = Sequential([
        Conv2D(16, 3, padding='same', activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
        MaxPooling2D(),
        Dropout(0.2),
        Conv2D(32, 3, padding='same', activation='relu'),
        MaxPooling2D(),
        Conv2D(64, 3, padding='same', activation='relu'),
        MaxPooling2D(),
        Dropout(0.2),
        Flatten(),
        Dense(512, activation='relu'),
        Dense(99)
    ])

    model.compile(optimizer='adam',
                  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
                  metrics=['accuracy'])

    return model

In [12]:
model = create_model()

model.summary()

model.load_weights(latest)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 224, 224, 16)      448       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 112, 112, 16)      0         
_________________________________________________________________
dropout (Dropout)            (None, 112, 112, 16)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 112, 112, 32)      4640      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 56, 56, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 56, 56, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 28, 28, 64)        0

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x13a59ff90>

In [14]:
class_0_path = "/Users/thibaultdesfontaines/data/leaf-classification/validation_images/Acer_Capillipes/"

In [17]:
list_ds = tf.data.Dataset.list_files(os.path.join(class_0_path, '*'))

In [18]:
for f in list_ds.take(5):
    print(f.numpy())

/Users/thibaultdesfontaines/data/leaf-classification/validation_images/Acer_Capillipes/227.jpg
/Users/thibaultdesfontaines/data/leaf-classification/validation_images/Acer_Capillipes/201.jpg


In [19]:
def decode_img(img):
    # convert the compressed string to a 3D uint8 tensor
    img = tf.image.decode_jpeg(img, channels=3)
    # Use `convert_image_dtype` to convert to floats in the [0,1] range.
    img = tf.image.convert_image_dtype(img, tf.float32)
    # resize the image to the desired size.
    return tf.image.resize(img, [IMG_HEIGHT, IMG_WIDTH])

In [20]:
def get_name(file_path):
    # convert the path to a list of path components
    parts = tf.strings.split(file_path, os.path.sep)
    # The second to last is the class-directory
    return parts[-1]

In [21]:
def process_path(file_path):
    name = get_name(file_path)
    # load the raw data from the file as a string
    img = tf.io.read_file(file_path)
    img = decode_img(img)
    return img, name

In [22]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

In [23]:
def prepare_for_training(ds, cache=True, shuffle_buffer_size=1000):
    # This is a small dataset, only load it once, and keep it in memory.
    # use `.cache(filename)` to cache preprocessing work for datasets that don't
    # fit in memory.
    if cache:
        if isinstance(cache, str):
            ds = ds.cache(cache)
        else:
            ds = ds.cache()

    ds = ds.shuffle(buffer_size=shuffle_buffer_size)

    # Repeat forever
    ds = ds.repeat()

    ds = ds.batch(BATCH_SIZE)

    # `prefetch` lets the dataset fetch batches in the background while the model
    # is training.
    ds = ds.prefetch(buffer_size=AUTOTUNE)

    return ds

In [33]:
BATCH_SIZE = 2

In [34]:
labeled_ds = list_ds.map(process_path, num_parallel_calls=AUTOTUNE)

In [35]:
train_ds = prepare_for_training(labeled_ds)

image_batch, label_batch = next(iter(train_ds))

In [36]:
label_batch

<tf.Tensor: shape=(2,), dtype=string, numpy=array(['227.jpg', '201.jpg'], dtype=object)>

In [37]:
# Expecting 0
predictions = model.predict_classes(image_batch)

In [40]:
predictions

array([94, 11])