> code example : https://keras.io/examples/vision/captcha_ocr/

# Setup

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt

from pathlib import Path
from collections import Counter

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# use csv file
import csv

# Data
fetch from train_image

change dir

> images and labels >> characters


In [2]:
images = []
labels = []
front_colors = []

# total color
colors = []

In [3]:
if images is not None:
    images = []
if labels is not None:
    labels = []
if front_colors is not None:
    front_colors = []
if colors is not None:
    colors = []

with open('./data_color_text.csv',newline='') as csvfile:
    _reader = csv.DictReader(csvfile)
    for row in _reader:
        if os.path.isfile(row['path']+'.jpg'):
            image_path = row['path'] + ".jpg"
            images.append(image_path)
            labels.append(row['text_front'])
            front_colors.append(row['front_color'])
            if not(row['front_color'] in colors):
                colors.append(row['front_color'])
        
print(images[1])
print(labels[1])
print(front_colors[1])

./image/198803039.jpg
ZSM
갈색


In [4]:
print("number of images found : ", len(images))
print("number of labels found : ", len(labels))
print("number of colors found : ", len(front_colors))

number of images found :  23607
number of labels found :  23607
number of colors found :  23607


# Parameters

In [5]:
# desired image dimensions
img_width = 224
img_height = 224

LR = 0.0001
EPOCHS = 20
# batch size for training and validation
BATCH_SIZE = 32

# Preprocessing
get data preprocessed

In [6]:
def fetch_data():
    for img_path, label, front_color in zip(images, labels, front_colors):
        try:
            outputs = (label, front_color)
            img = load_img(img_path, target_size=(224, 224))
            img = img_to_array(img)
            img = preprocess_input(img)
            yield img, outputs
            
        except GeneratorExit:
            return

# Create Dataset objects
split into train and valid dataset

In [7]:
dataset = tf.data.Dataset.from_generator(
    fetch_data,
    output_types=(tf.float32, (tf.string, tf.string)),
    output_shapes=(tf.TensorShape([224,224,3]),
                  (tf.TensorShape([]), tf.TensorShape([]))),
)

dataset

<FlatMapDataset shapes: ((224, 224, 3), ((), ())), types: (tf.float32, (tf.string, tf.string))>

In [8]:
train_size = int(0.8 * len(images))
test_size = int(0.2 * len(images))

dataset = dataset.shuffle(buffer_size=2048)

train_ds = dataset.take(train_size)
test_ds = dataset.skip(test_size)

print(train_ds)

<TakeDataset shapes: ((224, 224, 3), ((), ())), types: (tf.float32, (tf.string, tf.string))>


# Visualize the Data
using plt

In [None]:
for image, label in train_ds.take(1):
    

# Model
using class

In [9]:
base_model = MobileNetV2(
    weights="imagenet",
    include_top=False,
    input_shape=(224,224,3)
#     input_tensor=tf.keras.layers.Input(shape=(224,224,3))
)

In [10]:
head_model = base_model.output
# x = tf.keras.layers.Reshape(target_shape=(224,224,3))(head_model)
x = tf.keras.layers.GlobalAveragePooling2D()(head_model)

x = tf.keras.layers.Dense(1024)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation('relu')(x)

x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation('relu')(x)

x = tf.keras.layers.Dense(256)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation('relu')(x)

x = tf.keras.layers.Dense(128)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation('relu')(x)

front_color_layer = tf.keras.layers.Dense(40, activation='sigmoid', name='front_color_layer')(x)

# model
model = tf.keras.models.Model(inputs=base_model.input, outputs=front_color_layer)
model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 225, 225, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 112, 112, 32) 864         Conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 112, 112, 32) 128         Conv1[0][0]                      
_______________________________________________________________________________________

In [11]:
for layer in base_model.layers:
    layer.trainable = False

In [15]:
model.trainable_variablesn

AttributeError: 'Functional' object has no attribute 'trainable_variablesn'

# Training

In [None]:
optimizer = tf.keras.optimizers.Adam(LR)

model.compile(
    optimizer=optimizer,
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
history = model.fit(
    train_ds.batch(BATCH_SIZE),
    validation_data=test_ds.batch(BATCH_SIZE),
    epochs=EPOCHS
)