<a href="https://colab.research.google.com/github/ntnukelly/Generative_AI/blob/main/0225%E7%A5%9E%E7%B6%93%E7%B6%B2%E8%B7%AF%E4%BD%9C%E6%A5%AD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**大寫英文字母辨識**

將數字辨識改為英文字辨識

In [None]:
N1 = 128
N2 = 128
N3 = 64

In [3]:
!pip install gradio



In [4]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from torchvision import datasets, transforms
import gradio as gr
from PIL import Image
import shutil
import zipfile

# 確保數據集下載後解壓縮
emnist_path = "./data/EMNIST/gzip.zip"
unzip_path = "./data/EMNIST/raw/"
if os.path.exists(emnist_path) and not os.path.exists(unzip_path):
    with zipfile.ZipFile(emnist_path, 'r') as zip_ref:
        zip_ref.extractall("./data/EMNIST/")
    print("EMNIST 數據集解壓縮完成！")


# 下載並載入 EMNIST 數據集
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = datasets.EMNIST(root="./data", split="letters", train=True, download=True, transform=transform)
test_dataset = datasets.EMNIST(root="./data", split="letters", train=False, download=True, transform=transform)

# 轉換為 NumPy 陣列
x_train = train_dataset.data.numpy().astype("float32") / 255.0
y_train = train_dataset.targets.numpy()
x_test = test_dataset.data.numpy().astype("float32") / 255.0
y_test = test_dataset.targets.numpy()

# EMNIST 原始數據是翻轉的，需做垂直 + 水平翻轉處理
x_train = np.flip(x_train, axis=(1,2))
x_test = np.flip(x_test, axis=(1,2))

# 轉換形狀適應 CNN（28x28 圖片格式）
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)

# One-Hot 編碼（EMNIST 'letters' 標籤是 1~26，需轉為 0~25）
y_train = to_categorical(y_train - 1, num_classes=26)
y_test = to_categorical(y_test - 1, num_classes=26)

# 圖像增強
datagen = ImageDataGenerator(
    rotation_range=5,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.05,
    zoom_range=0.05
)
datagen.fit(x_train)

# CNN 模型架構
model = Sequential([
    Conv2D(N1, kernel_size=(3,3), activation='relu', input_shape=(28, 28, 1)),
    BatchNormalization(),
    Conv2D(N2, kernel_size=(3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2,2)),
    Dropout(0.3),
    Flatten(),
    Dense(N3, activation='relu'),
    Dropout(0.5),
    Dense(26, activation='softmax')
])

# 使用 Adam 優化器
model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.0005), metrics=['accuracy'])
model.summary()

# 訓練模型
model.fit(datagen.flow(x_train, y_train, batch_size=512), epochs=15, validation_data=(x_test, y_test))

# 測試模型
loss, acc = model.evaluate(x_test, y_test)
print(f"測試資料正確率: {acc*100:.2f}%")

# 預測函數
def recognize_letter(inp):
    # 取得手寫輸入
    image = np.array(inp["layers"][0], dtype=np.float32)
    image = image.astype(np.uint8)

    # 轉成 PIL 圖像格式
    image_pil = Image.fromarray(image)

    # 轉換為灰階
    image_gray = image_pil.convert("L")

    # 調整大小為 28x28
    img_array = np.array(image_gray.resize((28, 28), resample=Image.LANCZOS))

    # 轉換為 EMNIST 格式
    img_array = 255 - img_array  # 反轉顏色
    img_array = np.flip(img_array, axis=0)  # 確保方向正確
    img_array = img_array.reshape(1, 28, 28, 1) / 255.0  # 正規化

    # 顯示圖像來檢查
    plt.imshow(img_array.squeeze(), cmap="gray")
    plt.title("Processed Input Image")
    plt.show()

    # 進行預測
    prediction = model.predict(img_array).flatten()
    labels = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")

    return {labels[i]: float(prediction[i]) for i in range(26)}

# Gradio 介面
with gr.Blocks() as iface:
    sketchpad = gr.Sketchpad()
    label = gr.Label(num_top_classes=3)

    def predict_and_clear(inp):
        result = recognize_letter(inp)
        sketchpad.clear()  # 確保畫布清除
        return result, None  # 返回 None 來確保畫布被刷新

    btn = gr.Button("辨識")
    btn.click(predict_and_clear, inputs=sketchpad, outputs=[label, sketchpad])  # 確保畫布清除後更新

iface.launch(share=True, debug=True)




  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  self._warn_if_super_not_called()


Epoch 1/15
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 192ms/step - accuracy: 0.2325 - loss: 2.7357 - val_accuracy: 0.0614 - val_loss: 20.6241
Epoch 2/15
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 174ms/step - accuracy: 0.5731 - loss: 1.4285 - val_accuracy: 0.1659 - val_loss: 13.2941
Epoch 3/15
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 169ms/step - accuracy: 0.6693 - loss: 1.0754 - val_accuracy: 0.8800 - val_loss: 0.4067
Epoch 4/15
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 166ms/step - accuracy: 0.7191 - loss: 0.9046 - val_accuracy: 0.9025 - val_loss: 0.3095
Epoch 5/15
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 167ms/step - accuracy: 0.7500 - loss: 0.7935 - val_accuracy: 0.9117 - val_loss: 0.2765
Epoch 6/15
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 164ms/step - accuracy: 0.7734 - loss: 0.7176 - val_accuracy: 0.9061 - val_loss: 0.2916
Epoch 7/

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 533ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://a7e79d058f304c45f1.gradio.live


