In [21]:
import os

import cv2
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D, MaxPool2D, Dense
from sklearn.model_selection import train_test_split

In [28]:
OPERATORS_DATASET_PATH = '../dataset/images/operators/'
OPERATORS_ENCODER = {'add': 10, 'sub': 11, 'mul':12, 'div': 13}
OUTPUT_IMAGES = '../dataset/images'

In [23]:
def _load_local_images():
    operators = os.listdir(OPERATORS_DATASET_PATH)
    
    for c, folder in enumerate(operators, start=10):
        operator_path = os.path.join(OPERATORS_DATASET_PATH, folder)
        files = os.listdir(operator_path)
        
        results = np.zeros((len(files), 28, 28))
        
        for i, file_name in enumerate(files):
            image_path = os.path.join(operator_path, file_name)

            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            image = cv2.bitwise_not(image)
            results[i,:] = image
        yield results, np.ones(len(files), dtype=int) * OPERATORS_ENCODER[folder]

In [24]:
def _concat_local_images():
    operators_data = []
    classes_data = []
    
    for images, classes in _load_local_images():
        if len(operators_data) == 0:
            operators_data = images
            classes_data = classes
            continue

        operators_data = np.concatenate((operators_data, images))
        classes_data = np.concatenate((classes_data, classes))  
    
    return operators_data, classes_data

In [25]:
def load_data():
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
    operators_data, classes_data = _concat_local_images()
    
    x = np.concatenate((x_train[:650], x_test[:650], operators_data))
    y = np.concatenate((y_train[:650], y_test[:650], classes_data))
    
    return x, y

In [26]:
x, y = load_data()

In [27]:
x.shape, y.shape

((3746, 28, 28), (3746,))

In [36]:
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=101, test_size=0.25)

In [37]:
folder = os.path.join(OUTPUT_IMAGES, 'train')
os.makedirs(folder, exist_ok=True)

for index, (image, target) in enumerate(zip(x_train, y_train), start=1):
    cv2.imwrite(os.path.join(folder, f'{index:0>4}-{target}.png'), image)

In [38]:
folder = os.path.join(OUTPUT_IMAGES, 'test')
os.makedirs(folder, exist_ok=True)

for index, (image, target) in enumerate(zip(x_test, y_test), start=1):
    cv2.imwrite(os.path.join(folder, f'{index:0>4}-{target}.png'), image)