In [112]:
import os
import pandas as pd

numbers_path = 'data/숫자인식/'
alphabets_path = 'data/알파벳인식/'
free_patterns_path = 'data/자유패턴/'

number_classes = [str(i) for i in range(10)]
alphabet_classes = [chr(i) for i in range(ord('A'), ord('Z')+1)]

number_folders = {}
for class_name in number_classes:
    number_folders[class_name] = '0' + class_name

data = []

for number, folder_name in number_folders.items():
    n_folder_path = os.path.join(numbers_path, folder_name, 'n'+folder_name)
    r_folder_path = os.path.join(numbers_path, folder_name, 'r_n'+folder_name)
    for file in os.listdir(n_folder_path):
        data.append({
            'image_path': os.path.join(n_folder_path, file),
            'category': 'number',
            'target': number,
            'r': False
        })
    for file in os.listdir(r_folder_path):
        data.append({
            'image_path': os.path.join(r_folder_path, file),
            'category': 'number',
            'target': number,
            'r': True
        })

for letter in alphabet_classes:
    n_folder_path = os.path.join(alphabets_path, letter, letter)
    r_folder_path = os.path.join(alphabets_path, letter, 'r_'+letter)
    for file in os.listdir(n_folder_path):
        data.append({
            'image_path': os.path.join(n_folder_path, file),
            'category': 'alphabet',
            'target': letter,
            'r': False
        })
    for file in os.listdir(r_folder_path):
        data.append({
            'image_path': os.path.join(r_folder_path, file),
            'category': 'alphabet',
            'target': letter,
            'r': True
        })

for root, dirs, files in os.walk(free_patterns_path):
    for file in files:
        data.append({
            'image_path': os.path.join(root, file),
            'category': 'free_pattern',
            'target': None,
            'r': None
        })

df = pd.DataFrame(data)
df.sample(10)

Unnamed: 0,image_path,category,target,r
453,data/숫자인식/04/r_n04/4d_r_n04_03.jpg,number,4,True
1250,data/알파벳인식/C/C/4d_al_c_50.jpg,alphabet,C,False
3847,data/자유패턴/20/20_r/20_r18.jpg,free_pattern,,
3061,data/알파벳인식/U/U/4d_al_U_41.jpg,alphabet,U,False
51,data/숫자인식/00/r_n00/4d_r_n00_01.jpg,number,0,True
2571,data/알파벳인식/P/r_P/4d_al_r_P_17.jpg,alphabet,P,True
1360,data/알파벳인식/D/r_D/4d_al_r_D_09.jpg,alphabet,D,True
3782,data/자유패턴/20/20_g/20_g03.jpg,free_pattern,,
2750,data/알파벳인식/R/R/4d_al_R_30.jpg,alphabet,R,False
1069,data/알파벳인식/A/r_A/4d_al_r_a19.jpg,alphabet,A,True


In [113]:
duplicateRows = df[df.duplicated(['image_path'])]
duplicateRows

Unnamed: 0,image_path,category,target,r


In [114]:
df = df[df['r']==False]

In [115]:
from sklearn.model_selection import train_test_split
from preprocessing import preprocess_img
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout


X = df['image_path']
y = df['category']
y = pd.get_dummies(y)
#y = y[['number', 'alphabet', 'free_pattern']]
y = y[['number', 'alphabet']]

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, stratify=y_train, random_state=42)

X_train = X_train.apply(lambda x: preprocess_img(x, (32, 32)))
X_val = X_val.apply(lambda x: preprocess_img(x, (32, 32)))

X_train = np.stack(X_train.values)
X_val = np.stack(X_val.values)

In [127]:
test_2 = []
for root, dirs, files in os.walk('data/숫자인식/숫자 추가/'):
    for file in files:
        test_2.append({
            'image_path': os.path.join(root, file),
            'category': 'number',
        })

for root, dirs, files in os.walk('data/알파벳인식/알파벳 추가/'):
    for file in files:
        test_2.append({
            'image_path': os.path.join(root, file),
            'category': 'alphabet',
        })

test_df = pd.DataFrame(test_2)
test_df = test_df[test_df['image_path'].str.contains('_r_')==False]

X_test_2 = test_df['image_path'].apply(lambda x: preprocess_img(x, (32, 32)))
X_test_2 = np.stack(X_test_2.values)

y_test_2 = test_df['category']
y_test_2 = pd.get_dummies(y_test_2)
y_test_2 = y_test_2[['number', 'alphabet']]

In [119]:
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(2, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7f5f66cdb0d0>

In [126]:
y_test.value_counts()

number  alphabet
False   True        327
True    False       126
Name: count, dtype: int64

In [128]:
model.evaluate(X_test_2, y_test_2)



[1.2676557302474976, 0.5588889122009277]

In [124]:
X_test = X_test.apply(lambda x: preprocess_img(x, (32, 32)))
X_test = np.stack(X_test.values)

model.evaluate(X_test, y_test)



[0.14365528523921967, 0.9293597936630249]

In [96]:
alpha_test = []
for root, dirs, files in os.walk('data/알파벳인식/알파벳 추가/'):
    for file in files:
        alpha_test.append(os.path.join(root, file))

X_test_alpha = pd.Series(alpha_test)
X_test_alpha = X_test_alpha.apply(lambda x: preprocess_img(x, (32, 32)))
X_test_alpha = np.stack(X_test_alpha.values)

y_test_alpha = np.zeros((len(X_test_alpha), 3))
y_test_alpha[:, 1] = 1

model.evaluate(X_test_alpha, y_test_alpha)



[8.879897117614746, 0.0]

In [90]:
df9 = df[df['image_path'].str.contains('n09')]
df9.size

0

In [107]:
import tensorflow as tf
model = tf.keras.models.load_model('models/best_number_model.keras')