In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import cv2
import os
from tqdm import tqdm
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
from keras.callbacks import ModelCheckpoint

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
species = ['Black-grass', 'Charlock', 'Cleavers', 'Common Chickweed', 'Common wheat', 'Fat Hen',
          'Loose Silky-bent', 'Maize','Scentless Mayweed', 'Shepherds Purse',
          'Small-flowered Cranesbill', 'Sugar beet']
train_dir = './train'
test_dir = './test'

In [3]:
train_data = []
for species_id, sp in enumerate(species):
    for file in os.listdir(os.path.join(train_dir, sp)):
        train_data.append(['train/{}/{}'.format(sp, file), species_id, sp])
        
train = pd.DataFrame(train_data, columns=['File', 'SpeciesId', 'Species'])
train.head()

Unnamed: 0,File,SpeciesId,Species
0,train/Black-grass/0050f38b3.png,0,Black-grass
1,train/Black-grass/0183fdf68.png,0,Black-grass
2,train/Black-grass/0260cffa8.png,0,Black-grass
3,train/Black-grass/05eedce4d.png,0,Black-grass
4,train/Black-grass/075d004bc.png,0,Black-grass


In [4]:
train = train.sample(frac=1, random_state=42)
train.index = np.arange(len(train))
train.head()

Unnamed: 0,File,SpeciesId,Species
0,train/Shepherds Purse/cd3e9d61c.png,9,Shepherds Purse
1,train/Common Chickweed/2e5123448.png,3,Common Chickweed
2,train/Charlock/168982d9c.png,1,Charlock
3,train/Fat Hen/994001cab.png,5,Fat Hen
4,train/Common wheat/a86689d83.png,4,Common wheat


In [5]:
test_data = []
for file in os.listdir(test_dir):
    test_data.append(['test/{}'.format(file), file])
test = pd.DataFrame(test_data, columns=['Filepath', 'File'])
test.head()

Unnamed: 0,Filepath,File
0,test/0021e90e4.png,0021e90e4.png
1,test/003d61042.png,003d61042.png
2,test/007b3da8b.png,007b3da8b.png
3,test/0086a6340.png,0086a6340.png
4,test/00c47e980.png,00c47e980.png


In [7]:
IMAGE_SIZE = 64

def read_image(filepath):
    return cv2.imread(filepath)

def resize_image(image, image_size):
    return cv2.resize(image.copy(), image_size, interpolation=cv2.INTER_AREA)

In [8]:
def create_mask(image):
    # convert from bgr to hsv color-space to extract colored object
    image_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # define range of green in hsv
    lower_green = np.array([30, 100, 50])
    upper_green = np.array([85, 255, 255])
    # threshold the hsv image to get only the green colours
    mask = cv2.inRange(image_hsv, lower_green, upper_green)
    # We will use a morphological operation called closing to close small holes in the image
    # We need a kernel or structuring element to determine the nature of the operation
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15,15))
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    
    return mask

def segment_image(image):
    mask = create_mask(image)
    res = cv2.bitwise_and(image, image, mask=mask) # bitwise-AND mask and original image
    
    return res

In [9]:
X_train = np.zeros((train.shape[0], IMAGE_SIZE, IMAGE_SIZE, 3))
for i, file in tqdm(enumerate(train['File'].values)):
    image = read_image(file)
    image_segmented = segment_image(image)
    X_train[i] = resize_image(image_segmented, (IMAGE_SIZE, IMAGE_SIZE))
#Normalize
X_train = X_train / 255.
print('Train Shape: {}'.format(X_train.shape))

4750it [01:02, 75.80it/s]


Train Shape: (4750, 64, 64, 3)


In [10]:
Y_train = train['SpeciesId'].values
#Y_train = to_categorical(Y_train, num_classes = 12)

In [11]:
np.savez("out", X_train, Y_train)