pip install tensorflow

In [None]:
## 1. 프로젝트 개요
- 숫자(0~9)와 한글 자모(ㄱ~ㅎ)를 분류하는 OCR 딥러닝 모델
- CNN 구조를 사용하며 성능 향상 기법 적용

In [6]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# 📘 1. 프로젝트 개요 (마크다운 셀)
# 숫자(0~9)와 한글 자모(ㄱ~ㅎ)를 분류하는 CNN 기반 손글씨 OCR 모델입니다.
# CNN 기본 모델과 성능 향상 기법(Augmentation, Dropout, BatchNorm 등)을 실험합니다.

# 🧠 2. 라이브러리 불러오기
import os
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# 📂 3. 데이터 불러오기 함수
def load_data(data_dir, img_size=(28, 28)):
    images, labels = [], []
    class_names = sorted(os.listdir(data_dir))
    label_dict = {name: i for i, name in enumerate(class_names)}

    for class_name in class_names:
        for fname in os.listdir(os.path.join(data_dir, class_name)):
            img = load_img(os.path.join(data_dir, class_name, fname), target_size=img_size, color_mode='grayscale')
            img = img_to_array(img) / 255.0
            images.append(img)
            labels.append(label_dict[class_name])

    return np.array(images), to_categorical(labels), label_dict

# 📁 4. 데이터 로딩 및 분할
X, y, label_dict = load_data('./handwritten_sample_english/')
X = X.reshape(-1, 28, 28, 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 🔄 5. 데이터 증강
aug = ImageDataGenerator(
    rotation_range=10, width_shift_range=0.1,
    height_shift_range=0.1, zoom_range=0.1
)
aug.fit(X_train)

# 🔔 6. 콜백 함수 설정
callbacks = [
    EarlyStopping(patience=5, restore_best_weights=True),
    ReduceLROnPlateau(patience=3, factor=0.5)
]

# 🏗️ 7. 모델 구성 (성능 개선 포함)
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    BatchNormalization(),
    MaxPooling2D(2, 2),

    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    Dropout(0.25),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_dict), activation='softmax')
])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# 🚀 8. 모델 학습
history = model.fit(aug.flow(X_train, y_train, batch_size=64),
                    epochs=30, validation_split=0.1,
                    callbacks=callbacks)

# 🧪 9. 평가 및 정확도 출력
test_loss, test_acc = model.evaluate(X_test, y_test)
print("최종 테스트 정확도:", test_acc)

# 🖼️ 10. 예측 결과 시각화
pred = model.predict(X_test)
plt.figure(figsize=(15, 3))
for i in range(10):
    plt.subplot(1, 10, i + 1)
    plt.imshow(X_test[i].reshape(28, 28), cmap='gray')
    true_label = list(label_dict.keys())[np.argmax(y_test[i])]
    pred_label = list(label_dict.keys())[np.argmax(pred[i])]
    plt.title(f"T:{true_label}\nP:{pred_label}")
    plt.axis('off')
plt.tight_layout()
plt.show()
