<a href="https://colab.research.google.com/github/thejourneyofman/keras_medicine_detection/blob/master/01_imageCropping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import cv2, numpy as np, os
import xml.etree.ElementTree as ET
import imutils
import random
import shutil

image_dir = './data/image/'
#paths to source images
data_paths = [image_dir + 'medicine0' + str(label) + '/' for label in range(10)]
xml_paths = [image_dir + 'medicine0' + str(label) + '_xml/' for label in range(10)]

#paths to target images
folder_names = ['medicine0'+ str(label) for label in range(10)]

TRAIN_DIR = 'train_data'
TEST_DIR = 'test_data'

if not os.path.exists(TRAIN_DIR):
    os.mkdir(TRAIN_DIR)

if not os.path.exists(TEST_DIR):
    os.mkdir(TEST_DIR)

for name in folder_names:
    if not os.path.exists(os.path.join(TRAIN_DIR, name)):
        os.mkdir(os.path.join(TRAIN_DIR, name))
    if not os.path.exists(os.path.join(TEST_DIR, name)):
        os.mkdir(os.path.join(TEST_DIR, name))

for data_path, xml_path, folder_name in zip(data_paths,xml_paths, folder_names):
    for file in os.scandir(os.path.join(TRAIN_DIR, folder_name)):
        if file.name.endswith(".jpg"):
            os.unlink(file.path)
    for file in os.scandir(os.path.join(TEST_DIR, folder_name)):
        if file.name.endswith(".jpg"):
            os.unlink(file.path)
    print(data_path)
    entries = os.listdir(data_path)
    xmls = [f for f in os.listdir(xml_path) if f.endswith('.xml')]

    idx_cropped = 0
    for entry, xml in zip(entries, xmls):
        tree = ET.parse(xml_path + xml)
        root = tree.getroot()
        for object in root.findall('object'):
            bndbox = object.find('bndbox')
            xmin = int(bndbox.find('xmin').text)
            ymin = int(bndbox.find('ymin').text)
            xmax = int(bndbox.find('xmax').text)
            ymax = int(bndbox.find('ymax').text)

        image = cv2.imread(data_path + entry)
        cropped = image[ymin:ymax, xmin:xmax]

        # Rotate the cut image and save
        cv2.imwrite(os.path.join(TRAIN_DIR, folder_name, str(idx_cropped) + '.jpg'), cropped)
        idx_cropped += 1

        # find contours of the cut image and save
        gray = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
        gray = cv2.GaussianBlur(gray, (3, 3), 0)
        edged = cv2.Canny(gray, 20, 100)
        # find contours in the edge map
        cnts = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL,
                                cv2.CHAIN_APPROX_SIMPLE)
        cnts = imutils.grab_contours(cnts)
        # loop over the contours
        for c in cnts:
            if cv2.contourArea(c) < 50:
               continue
            mask = np.zeros(gray.shape, dtype="uint8")
            cv2.drawContours(mask, [c], -1, 255, -1)
            (x, y, w, h) = cv2.boundingRect(c)
            cv2.imwrite(os.path.join(TRAIN_DIR, folder_name, str(idx_cropped) + '.jpg'), cropped[y:y+h, x:x+w])
            idx_cropped += 1

for folder_name in folder_names:
    files = os.listdir(os.path.join(TRAIN_DIR, folder_name))
    random.shuffle(files)
    for f in files[:100]:
        source = os.path.join(TRAIN_DIR, folder_name, f)
        dest = os.path.join(TEST_DIR, folder_name)
        shutil.move(source, dest)

cv2.destroyAllWindows()


./data/image/medicine00/
./data/image/medicine01/
./data/image/medicine02/
./data/image/medicine03/
./data/image/medicine04/
./data/image/medicine05/
./data/image/medicine06/
./data/image/medicine07/
./data/image/medicine08/
./data/image/medicine09/
