## Building a CNN, Predicting, and Evaluating

## 1. Building a CNN

In [30]:
# Import Libraries
import os
import cv2
import pandas as pd
import numpy as np
import keras

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.offsetbox import OffsetImage, AnnotationBbox

from mpl_toolkits.axes_grid1 import ImageGrid
from mpl_toolkits.mplot3d import Axes3D

from keras.applications import xception
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image

from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.preprocessing import normalize
from sklearn.preprocessing import StandardScaler

from random import randint
from tqdm import tqdm

seed = 1000

In [28]:
def read_img(filepath, size):
    img = image.load_img(os.path.join(filepath), target_size=size)
    img = image.img_to_array(img)
    return img

In [3]:
# Set up paths
train_path = 'train'
test_path = 'test'

In [12]:
categories = ['Black-grass', 'Charlock', 'Cleavers', 'Common Chickweed', 'Common wheat', 'Fat Hen', 'Loose Silky-bent',
              'Maize', 'Scentless Mayweed', 'Shepherds Purse', 'Small-flowered Cranesbill', 'Sugar beet']
num_categories = len(categories)
num_samples = 200

In [5]:
for category in categories:
    print('{}: {} images'.format(category, len(os.listdir(os.path.join(train_path, category)))))

Black-grass: 263 images
Charlock: 390 images
Cleavers: 287 images
Common Chickweed: 611 images
Common wheat: 221 images
Fat Hen: 475 images
Loose Silky-bent: 654 images
Maize: 221 images
Scentless Mayweed: 516 images
Shepherds Purse: 231 images
Small-flowered Cranesbill: 496 images
Sugar beet: 385 images


In [7]:
train = []
for category_id, category in enumerate(categories):
    for file in os.listdir(os.path.join(train_path, category)):
        train.append(['train/{}/{}'.format(category, file), category_id, category])
train = pd.DataFrame(train, columns=['file', 'category_id', 'category'])

In [10]:
test = []
for file in os.listdir(test_path):
    test.append(['test/{}'.format(file), file])
test = pd.DataFrame(test, columns=['filepath', 'file'])

In [13]:
train = pd.concat([train[train['category'] == c][:num_samples] for c in categories])
train = train.sample(frac=1)
train.index = np.arange(len(train))

### Validation Set

In [18]:
np.random.seed(seed=seed)
rnd = np.random.random(len(train))
train_idx = rnd < 0.8
valid_idx = rnd >= 0.8
ytr = train.loc[train_idx, 'category_id'].values
yv = train.loc[valid_idx, 'category_id'].values
len(ytr), len(yv)

(1903, 497)

### Xception

In [25]:
input_size = 299
pooling = 'max'

In [31]:

x_train = np.zeros((len(train), input_size, input_size, 3), dtype='float32')
for i, file in tqdm(enumerate(train['file'])):
    img = read_img(file, (input_size, input_size))
    x = xception.preprocess_input(np.expand_dims(img.copy(), axis=0))
    x_train[i] = x

2400it [00:39, 61.27it/s]


In [33]:
Xtr = x_train[train_idx]
Xv = x_train[valid_idx]
print((Xtr.shape, Xv.shape, ytr.shape, yv.shape))
xception_bottleneck = xception.Xception(weights='imagenet', include_top=False, pooling=pooling)
train_x_bf = xception_bottleneck.predict(Xtr, batch_size=32, verbose=1)
valid_x_bf = xception_bottleneck.predict(Xv, batch_size=32, verbose=1)

((1903, 299, 299, 3), (497, 299, 299, 3), (1903,), (497,))
Instructions for updating:
Colocations handled automatically by placer.
Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.4/xception_weights_tf_dim_ordering_tf_kernels_notop.h5






ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\Richard\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-33-3952c71c3952>", line 5, in <module>
    train_x_bf = xception_bottleneck.predict(Xtr, batch_size=32, verbose=1)
  File "C:\Users\Richard\Anaconda3\lib\site-packages\keras\engine\training.py", line 1169, in predict
    steps=steps)
  File "C:\Users\Richard\Anaconda3\lib\site-packages\keras\engine\training_arrays.py", line 294, in predict_loop
    batch_outs = f(ins_batch)
  File "C:\Users\Richard\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py", line 2715, in __call__
    return self._call(inputs)
  File "C:\Users\Richard\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py", line 2675, in _call
    fetched = self._callable_fn(*array_vals)
  File "C:\Users\Richard\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1439

KeyboardInterrupt: 

### LogReg Xception 

In [None]:
logreg = LogisticRegression(multi_class='multinomial', solver='lbfgs', random_state=seed)
logreg.fit(train_x_bf, ytr)
valid_probs = logreg.predict_proba(valid_x_bf)
valid_preds = logreg.predict(valid_x_bf)