In [1]:
# Ignore  the warnings
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

# data visualisation and manipulation
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns
 
#configure
# sets matplotlib to inline and displays graphs below the corressponding cell.
% matplotlib inline  
style.use('fivethirtyeight')
sns.set(style='whitegrid',color_codes=True)

#model selection
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score,precision_score,recall_score,confusion_matrix,roc_curve,roc_auc_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold

#preprocess.
from keras.preprocessing.image import ImageDataGenerator

#dl libraraies
from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam,SGD,Adagrad,Adadelta,RMSprop
from keras.utils import to_categorical

# specifically for cnn
from keras.layers import Dropout, Flatten,Activation
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
 
import tensorflow as tf
import random as rn

# specifically for manipulating zipped images and getting numpy arrays of pixel values of images.
import cv2                  
import numpy as np  
from tqdm import tqdm
import os                   
from random import shuffle  
from zipfile import ZipFile
from PIL import Image

Using TensorFlow backend.


In [2]:
import numpy as np
import os
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from keras.callbacks import ReduceLROnPlateau
from keras.optimizers import SGD, RMSprop
from keras.utils import np_utils
from keras import backend as K
from sklearn.model_selection import StratifiedKFold

In [3]:
input_dir = "data/train"  # Input directory
learning_rate = 0.001  # Learning rate
opt = "SGD"  # Optimizer
target_size = (150, 150)  # Target size for data augmentation

# Configure a callback to reduce the learning rate upon plateau
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=50,
                              cooldown=50, min_lr=0.0001, verbose=1)

# Path to pre-trained weights file, if used. Otherwise None.
weights = None

# Configure the validation parameters
kfold = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

In [4]:

# Set up a list for keeping validation scores
scores = []

# Read data & labels
data, labels, classes = [], [], {}

for (root, subdirs, files) in os.walk(input_dir):
    # Assign a numerical identifier to each class directory
    for i, class_dir in enumerate(subdirs):
        classes[class_dir] = i

    # Define allowed image extensions
    ext = ['png', 'jpg', 'jpeg']

    # Loop over the files in each directory
    for f in files:
        if f.split('.')[-1] in ext:  # Check file extension
            path = os.path.join(root, f)  # Get image path
            label = path.split('/')[-2]  # Extract class label from path
            numlabel = classes[label]  # Get numerical label from the dict

            print( "*** Now processing {} / {} / {} ...".format(path,
                                                               label,
                                                               numlabel))

            # Load and preprocess image
            image = load_img(path, target_size=target_size)  # Load image
            features = img_to_array(image)  # Convert image to numpy array

            labels.append(numlabel)  # Append label to list
            data.append(features)  # Append features to list



*** Now processing data/train/sunflower/24459750_eb49f6e4cb_m.jpg / sunflower / 0 ...
*** Now processing data/train/sunflower/26254755_1bfc494ef1_n.jpg / sunflower / 0 ...
*** Now processing data/train/sunflower/27466794_57e4fe5656.jpg / sunflower / 0 ...
*** Now processing data/train/sunflower/27465811_9477c9d044.jpg / sunflower / 0 ...
*** Now processing data/train/sunflower/6953297_8576bf4ea3.jpg / sunflower / 0 ...
*** Now processing data/train/sunflower/24459548_27a783feda.jpg / sunflower / 0 ...
*** Now processing data/train/sunflower/29972905_4cc537ff4b_n.jpg / sunflower / 0 ...
*** Now processing data/train/tulip/14957470_6a8c272a87_m.jpg / tulip / 1 ...
*** Now processing data/train/tulip/11746276_de3dec8201.jpg / tulip / 1 ...
*** Now processing data/train/tulip/11746452_5bc1749a36.jpg / tulip / 1 ...
*** Now processing data/train/tulip/11746548_26b3256922_n.jpg / tulip / 1 ...
*** Now processing data/train/tulip/10791227_7168491604.jpg / tulip / 1 ...
*** Now processing data

In [5]:
# Convert data and labels to numpy arrays
data = np.asarray(data, dtype=np.float32)
labels = np.asarray(labels, dtype=np.float32)

In [6]:
data.shape

(35, 150, 150, 3)

In [7]:
labels.shape

(35,)

In [8]:
def create_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), input_shape=(150, 150,3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(64, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(5))
    model.add(Activation('softmax'))

    return model

In [9]:
opt= SGD
    for n, (train, test) in enumerate(kfold.split(data, labels)):

        # Select CNN architecture
        model = create_model()
        # If selected, configure SGD optimizer
        if opt == "SGD":
            optimizer = SGD(lr=learning_rate, decay=1e-5,
                            momentum=0.9, nesterov=True)

        # If selected, configure RMSProp optimizer
        if opt == "RMSProp":
            optimizer = RMSprop(lr=learning_rate)

        # Compile the model
        model.compile(loss="categorical_crossentropy", optimizer=optimizer,
                      metrics=['categorical_accuracy'])

        # Split the data into training and testing sets
        traindata, trainlabels = data[train], labels[train]
        testdata, testlabels = data[test], labels[test]

        # Convert integer labels into one-hot encoded vectors
        trainlabels = np_utils.to_categorical(trainlabels, 5)
        testlabels = np_utils.to_categorical(testlabels, 5)

In [10]:

# Set up generator for training data
training_generator = ImageDataGenerator(rescale=1./255,
                                        rotation_range=10,
                                        width_shift_range=0.2,
                                        height_shift_range=0.05,
                                        shear_range=0.2,
                                        zoom_range=0.2,
                                        horizontal_flip=True,
                                        fill_mode='nearest'
                                        )

# Generate training data
training_data = training_generator.flow(traindata,
                                        trainlabels,
                                        batch_size=256
                                        )


# Set up generator for validation data
validation_generator = ImageDataGenerator(rescale=1./255,
                                          rotation_range=10,
                                          width_shift_range=0.2,
                                          height_shift_range=0.05,
                                          shear_range=0.2,
                                          zoom_range=0.2,
                                          horizontal_flip=True,
                                          fill_mode='nearest'
                                          )

# Generate validation data
validation_data = validation_generator.flow(testdata,
                                            testlabels,
                                            batch_size=32
                                            )

# Start training the model
training = model.fit_generator(training_data,
                           steps_per_epoch=2048,
                           epochs=5,
                           validation_data=validation_data,
                           validation_steps=256,
                           callbacks=[reduce_lr]
                           )

# Evaluate the model
(loss, accuracy) = model.evaluate(testdata,
                              testlabels,
                              batch_size=32,
                              verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
0.6000 (STDEV 0.0000)
Best result for fold 0


In [None]:
scores.append(accuracy)

In [19]:
scores

[0.6000000238418579, 0.6000000238418579]

In [11]:

# Print the scores and the best fold
print( "Best result for fold %s" % np.argmax(scores))

0.6000 (STDEV 0.0000)
Best result for fold 0


In [17]:
print("Mean of scores: %.6f" %  (np.mean(scores)))

Mean of scores: 0.600000


In [18]:
print( "STDEV %.4f" % np.std(scores))

STDEV 0.0000
