In [55]:
import pandas as pd
import numpy as np
import os
import imageio
from tqdm import tqdm
from skimage.transform import resize as imresize
from sklearn.model_selection import train_test_split

In [56]:
def img_reshape(img):
    img = imresize(img, (51, 51, 3))
    return img

def img_label(path):
    return str(str(path.split('/')[-1]))

def img_class(path):
    return str(str(path.split('/')[-2]))

def fill_dict(paths, some_dict):
    text = ''
    isTest = False
    if 'train' in paths[0]:
        text = 'Start fill train_dict'
    elif 'test' in paths[0]:
        text = 'Start fill test_dict'
        isTest = True

    for p in tqdm(paths, ascii=True, ncols=85, desc=text):
        img = imageio.imread(p)
        img = img_reshape(img)
        some_dict['inputs'].append(img)
        if isTest:
            some_dict['targets'].append("Sugar beet")
        else:
            some_dict['targets'].append(img_class(p))
            
        if 'test' in paths[0]:
            some_dict['labels'].append(img_label(p))

    return some_dict

def reader():
    file_ext = []
    train_path = []
    test_path = []

    for root, dirs, files in os.walk('../plant-seedlings-classification/'):
        if dirs != []:
            print('Root:\n'+str(root))
            print('Dirs:\n'+str(dirs))
        else:
            for f in files:
                ext = os.path.splitext(str(f))[1][1:]

                if ext not in file_ext:
                    file_ext.append(ext)

                if 'train' in root:
                    path = os.path.join(root, f)
                    train_path.append(path)
                elif 'test' in root:
                    path = os.path.join(root, f)
                    test_path.append(path)
    train_dict = {
        'inputs': [],
        'targets': []
    }
    test_dict = {
        'inputs': [],
        'labels':[],
        'targets': []
    }

    train_dict = fill_dict(train_path, train_dict)
    test_dict = fill_dict(test_path, test_dict)
    return train_dict, test_dict

In [57]:
train, test = reader()

  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "
Start fill train_dict:   0%|                        | 6/4750 [00:00<01:22, 57.53it/s]

Root:
../plant-seedlings-classification/
Dirs:
['test', 'train']
Root:
../plant-seedlings-classification/train
Dirs:
['Cleavers', 'Sugar beet', 'Common Chickweed', 'Loose Silky-bent', 'Scentless Mayweed', 'Shepherds Purse', 'Fat Hen', 'Common wheat', 'Black-grass', 'Small-flowered Cranesbill', 'Charlock', 'Maize']


Start fill train_dict: 100%|#####################| 4750/4750 [02:05<00:00, 37.96it/s]
Start fill test_dict: 100%|########################| 794/794 [00:08<00:00, 97.07it/s]


In [58]:
# test:794, train:4750
X_train, X_valid, y_train, y_valid = train_test_split(train["inputs"], train["targets"], test_size=794, random_state=63)

train_set = {
    'inputs': [],
    'targets': [] 
}

valid_set = {
    'inputs': [],
    'targets': []
}

train_set['inputs'].append(X_train)
train_set['targets'].append(y_train)
valid_set['inputs'].append(X_valid)
valid_set['targets'].append(y_valid)

shape = np.shape(train_set['inputs'])
train_set['inputs'] = np.reshape(train_set['inputs'], shape[1:])
shape = np.shape(train_set['targets'])
train_set['targets'] = np.reshape(train_set['targets'], shape[1:])

shape = np.shape(valid_set['inputs'])
valid_set['inputs'] = np.reshape(valid_set['inputs'], shape[1:])
shape = np.shape(valid_set['targets'])
valid_set['targets'] = np.reshape(valid_set['targets'], shape[1:])

print(train_set['inputs'].shape)
print(train_set['targets'].shape)
print(valid_set['inputs'].shape)
print(valid_set['targets'].shape)

(3956, 51, 51, 3)
(3956,)
(794, 51, 51, 3)
(794,)


In [59]:
np.savez("../data/plant-test",**test)
np.savez("../data/plant-train",**train_set)
np.savez("../data/plant-valid",**valid_set)