In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from keras import Input
from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Dropout, Softmax, Flatten, Reshape
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import np_utils
from keras.datasets import mnist
import tensorflow as tf
import os
import json
import cv2
from sklearn.model_selection import train_test_split

In [2]:
FOLDER_NAME = '../data'
OUTPUT_FILENAME = '../data.json'

with open(OUTPUT_FILENAME, 'r') as json_file:
    data = json.load(json_file)

pictures = []
letters = []
unique_groups = set()
for datum in data:
    img = cv2.imread(
        os.path.join(FOLDER_NAME, datum['pic']),
        cv2.IMREAD_GRAYSCALE
    )
    img = img.astype('float32')
    img /= 255
    pictures.append(img)
    letters.append(datum['group'])
    unique_groups.add(datum['group'])
pictures = np.expand_dims(np.array(pictures), axis=3)
group_to_vec = {gr: i for i, gr in enumerate(unique_groups)}
groups = []
for letter in letters:
    vec = np.zeros(len(unique_groups))
    vec[group_to_vec[letter]] = 1
    groups.append(vec)
groups = np.array(groups)

p_train, p_test, l_train, l_test = train_test_split(pictures, groups, test_size=0.2)

In [3]:
input_shape = (32,32,1)
output_shape = len(unique_groups)
pool_size = (2, 2)
conv_filters1 = 32
conv_filters2 = 64
conv_kernel = 3
print(output_shape)

11


In [4]:
model = Sequential(name='CnnForGroups')

model.add(Input(shape=(32,32,1)))
model.add(Conv2D(filters=conv_filters1, kernel_size=conv_kernel, strides=(1, 1), padding="same"))
model.add(MaxPooling2D(pool_size=pool_size))
model.add(Conv2D(filters=conv_filters2, kernel_size=conv_kernel, strides=(1, 1), padding="same"))
model.add(MaxPooling2D(pool_size=pool_size))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(len(unique_groups), activation='softmax'))

model.summary()

opt = tf.keras.optimizers.Adam(learning_rate=1e-4)
model.compile(optimizer=opt,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

Model: "CnnForGroups"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 32, 32, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 16, 16, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 8, 8, 64)          0         
_________________________________________________________________
flatten (Flatten)            (None, 4096)              0         
_________________________________________________________________
dense (Dense)                (None, 512)               2097664   
_________________________________________________________________
dropout (Dropout)            (None, 512)              

2022-01-17 02:39:07.818868: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
model.fit(p_train, l_train, validation_data=(p_test, l_test), batch_size=32, epochs=20)

2022-01-17 02:39:08.871630: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1730843a0>

In [6]:
model.save('../bin/letter_to_group')

2022-01-17 02:40:04.415381: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: ../bin/letter_to_group/assets


In [9]:
print(group_to_vec.keys())

dict_keys(['F', 'Br', 'B', 'O', 'Cl', 'N', 'T', 'S', 'P', 'C', '0'])


In [8]:
print(p_train.shape)

(2800, 32, 32, 1)
