In [None]:
ls

In [None]:
from tqdm import tqdm
import requests


def download_file(url, out_file):

    chunk_size = 1024
    r = requests.get(url, stream=True)
    total_size = int(r.headers['content-length'])

    with open(out_file, 'wb') as f:
        for data in tqdm(iterable=r.iter_content(chunk_size=chunk_size),
                         total=total_size/chunk_size, unit='KB'):
            f.write(data)

    print('{} download Complete!'.format(out_file))

In [None]:
url = 'https://vision.eng.au.dk/?download=/data/WeedData/NonsegmentedV2.zip'
zip_file = './data.zip'
data_path = './data'

import os

if not os.path.isfile(zip_file):
    download_file(url, zip_file)

In [None]:
ls

In [None]:
from zipfile import ZipFile

if not os.path.isdir(data_path):
    with ZipFile(zip_file, 'r') as f:
        print('Extracting all the files now ...')
        f.extractall('./data')
        print('Done!')

In [None]:
ls

In [None]:
ls data/

In [None]:
labels_list = os.listdir(data_path)
num_labels = len(labels_list)

print('Labels:')

for idx, label in enumerate(labels_list):
    print('{}. {}'.format(idx+1, label))

In [None]:
import matplotlib.pyplot as plt
import glob

X = []
y = []

plt.figure(1)

print('No of images in:')

for idx, label in enumerate(labels_list):
    label_path = os.path.join(data_path, label)
    images_list = glob.glob(os.path.join(label_path, '*.png'))
    num_images = len(images_list)
    X += images_list
    y += [label] * num_images
    img_path = images_list[0]
    img = plt.imread(img_path)

    plt.subplot(3,4,idx+1)
    plt.imshow(img)
    plt.title(label)
    plt.axis('off')

    print('{} directory: {}'.format(label, num_images))

print()
plt.show()

In [None]:
plt.hist(y, bins=86)
plt.xlabel('labels')
plt.ylabel('no of images')
plt.show()

In [None]:
import os
import json
import numpy as np

from keras.models import Model
from keras.layers import Dense, Flatten
from keras.preprocessing import image
from keras.utils import to_categorical
from keras.applications.vgg19 import VGG19, preprocess_input
from keras import optimizers

from sklearn.model_selection import StratifiedKFold
from sklearn.utils import shuffle

In [None]:
output_path = './output_data'
log_file = os.path.join(output_path, 'log.csv')

model_path = os.path.join(output_path, 'plant_vgg19.h5')
weights_path = os.path.join(output_path, 'plant_vgg19_weights.h5')

if not os.path.exists(output_path):
    os.makedirs(output_path)
    
n_splits = 5

batch_size = 16
epochs = 5

In [None]:
# Load and resize all images# Load  

print("Loading images...")

temp = []

for filename in X:
    img = image.load_img(filename, target_size=(299, 299, 3))
    img = image.img_to_array(img) * (0.00392156862745098)
    temp.append(img)

print('Done!')

In [None]:
# Convert to numpy array

X = np.array(temp)  # Matrix of (m x 299 x 299 x 3)
X = preprocess_input(X)  # Preprocess using VGG19 preprocess_input
y = np.array(y)  # Convert target to numpy array of m x 1

In [None]:
#y = y.reshape((y.shape[0], 1))

from sklearn.model_selection import train_test_split

X, y = shuffle(X, y)

X, X_valid, y, y_valid = train_test_split(X, y, test_size=0.1,
                                          random_state=0, stratify=y)

X, X_test, y, y_test = train_test_split(X, y, test_size=0.2, 
                                        random_state=0, stratify=y)

print('Train:', len(X), len(y))
print('Valid:', len(X_valid), len(y_valid))
print('Test:', len(X_test), len(y_test))

In [None]:
# Load model
# include_top is used to remove all the layers after block conv5

model = VGG19(include_top=False, input_shape=img.shape)

# Freeze all layers
for layer in model.layers:
    layer.trainable = False

# re-add the removed layers
x = model.output
x = Flatten(name="flatten")(x)
x = Dense(4096, activation="relu", name="fc1")(x)
x = Dense(4096, activation="relu", name="fc2")(x)
x = Dense(num_labels, activation="softmax", name="predictions")(x)

# Redefine the model
model = Model(inputs=model.input, outputs=x, name="final_model")

In [None]:
model.summary()

In [None]:
from keras import backend as K

def precision_micro(y_true, y_pred):
    tp = K.sum(y_true * y_pred)
    fp = K.sum(y_pred * K.cast(K.equal(y_true, K.zeros_like(y_true)), "float32"))
    return tp / (tp + fp + K.epsilon())


def recall_micro(y_true, y_pred):
    tp = K.sum(y_true * y_pred)
    fn = K.sum(y_true * K.cast(K.equal(y_pred, K.zeros_like(y_pred)), "float32"))
    return tp / (tp + fn + K.epsilon())


def f1_micro(y_true, y_pred):
    y_pred = K.argmax(y_pred, axis=-1)
    y_pred = K.one_hot(y_pred, 12)
    pm = precision_micro(y_true, y_pred)
    rm = recall_micro(y_true, y_pred)
    return (2 * pm * rm) / (pm + rm + K.epsilon())

In [None]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
encoder.fit(y)
y = encoder.transform(y).astype(np.int32)

print(y.shape)

In [None]:
adam = optimizers.Adam(lr=0.0001)

# compile the model
model.compile(optimizer=adam, loss='categorical_crossentropy', 
              metrics=['accuracy', f1_micro])

In [None]:
# Callbacks

from keras.callbacks import TensorBoard
from keras.callbacks import TerminateOnNaN
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping
from keras.callbacks import CSVLogger

# TerminateOnNaN
terminate_callback = TerminateOnNaN()

# Tensorboard
tb_callback = TensorBoard('./Graph', histogram_freq=0, write_graph=True, write_images=True)

# Model Checkppoint
ckpt_callback = ModelCheckpoint('./weights.{epoch:02d}-{val_loss:.2f}.hdf5', 
                                verbose=1, save_weights_only=False, mode='auto', period=5)

# Early Stopping
#stopping_callback = EarlyStopping(monitor='val_loss', patience=2, verbose=1, mode='auto')

# CSV Logger
csv_logger = CSVLogger('./training.log')

# Callbacks list
callbacks = [terminate_callback, tb_callback, ckpt_callback, csv_logger]

In [None]:
# Define a splitter

skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=0)

# Training
print("Start cross-validation training...")
histories = []
for train, val in skf.split(X, y):
    Xtrain = X[train, :]
    ytrain = to_categorical(y[train,], num_classes = num_labels)
    Xval = X[val, :]
    yval = to_categorical(y[val,], num_classes = num_labels)
    history = model.fit(Xtrain, ytrain, batch_size=batch_size, 
                        epochs=epochs, validation_data=(Xval, yval))
    histories.append(history)

In [None]:
# Full training

epochs = 10

print("Full training...")

ytrain = to_categorical(y, num_classes = num_labels)
history = model.fit(X, ytrain, batch_size=batch_size, epochs=epochs, 
                   callbacks = callbacks, validation_data=(X_valid, y_valid))
histories.append(history)

print("Save whole model...")
model.save(model_path)

print("Save weights of the model")
model.save(weights_path)

with open(log_file, "w") as f:
    json.dump(histories, f)

In [None]:
y_test = to_categorical(y_test, num_classes = num_labels)

Eval = model.evaluate(X_test, y_test, verbose=1)
print(Eval)

In [None]:
1/255