<a href="https://colab.research.google.com/github/junoso/AndroidQuizApp/blob/main/Assignment_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# --- 1. SETUP -------------------------------------------------
!pip install -q tensorflow opencv-python-headless scikit-learn pandas matplotlib

import os, cv2, numpy as np, json, shutil
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import display, Markdown

print(f"TF: {tf.__version__} | GPU: {tf.config.list_physical_devices('GPU')}")

# --- 2. DOWNLOAD FULL DATASET ---------------------------------
!git clone https://github.com/chenkenanalytic/handwritting_data_all.git
%cd handwritting_data_all
!cat all_data.zip* > all_data.zip
!unzip -q all_data.zip -d extracted_data
DATA_DIR = "/content/handwritting_data_all/extracted_data/cleaned_data"


TF: 2.19.0 | GPU: []
fatal: destination path 'handwritting_data_all' already exists and is not an empty directory.
/content/handwritting_data_all
replace extracted_data/cleaned_data/10000/ф═_0.png? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace extracted_data/cleaned_data/10000/ф═_1.png? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace extracted_data/cleaned_data/10000/ф═_10.png? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace extracted_data/cleaned_data/10000/ф═_11.png? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace extracted_data/cleaned_data/10000/ф═_12.png? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace extracted_data/cleaned_data/10000/ф═_13.png? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace extracted_data/cleaned_data/10000/ф═_14.png? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace extracted_data/cleaned_data/10000/ф═_15.png? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace extracted_data/cleaned_data/10000/ф═_16.png? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace extracted_d

In [None]:
# --------------------------------------------------------------
# 3. GROUP BY CHARACTER
# --------------------------------------------------------------
char_images = {}
for root, _, files in os.walk(DATA_DIR):
    for f in files:
        if f.lower().endswith('.png'):
            char = os.path.basename(root)
            char_images.setdefault(char, []).append(os.path.join(root, f))

print(f"Characters: {len(char_images)}")

# --------------------------------------------------------------
# 4. TRAIN / TEST SPLIT (first 40 → train)
# --------------------------------------------------------------
train_paths, train_labels = [], []
test_paths,  test_labels  = [], []

for char, imgs in char_images.items():
    imgs = sorted(imgs)
    if len(imgs) < 50: continue
    train_paths.extend(imgs[:40])
    train_labels.extend([char]*40)
    test_paths.extend(imgs[40:50])
    test_labels.extend([char]*len(imgs[40:50]))

print(f"Train raw: {len(train_paths)} | Test: {len(test_paths)}")

# --------------------------------------------------------------
# 5. AUGMENTATION (OpenCV)
# --------------------------------------------------------------
def augment_image(img):
    h, w = img.shape[:2]
    Ms = [
        cv2.getRotationMatrix2D((w/2, h/2),  5, 1.0),
        cv2.getRotationMatrix2D((w/2, h/2), -5, 1.0),
        cv2.getRotationMatrix2D((w/2, h/2),  0, 1.1),
        cv2.getRotationMatrix2D((w/2, h/2),  0, 0.9),
        np.float32([[1, 0.1, 0], [0, 1, 0]])
    ]
    return [cv2.warpAffine(img, M, (w, h),
                           borderMode=cv2.BORDER_CONSTANT,
                           borderValue=255) for M in Ms]

# --------------------------------------------------------------
# 6. LOAD + RESIZE (64×64)
# --------------------------------------------------------------
IMG_SIZE = 64

def load(path, augment=False):
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
    img = img.astype('float32')/255.0
    img = np.expand_dims(img, -1)
    if augment:
        return augment_image(img)
    return [img]

# TRAIN (with augmentation)
X_train, y_train = [], []
for p, l in zip(train_paths, train_labels):
    imgs = load(p, augment=True)
    X_train.extend(imgs)
    y_train.extend([l]*len(imgs))

X_train = np.array(X_train)
y_train = np.array(y_train)

# TEST (no augmentation)
X_test, y_test = [], []
for p, l in zip(test_paths, test_labels):
    X_test.append(load(p, augment=False)[0])
    y_test.append(l)

X_test = np.array(X_test)
y_test = np.array(y_test)

print(f"Train (aug): {X_train.shape} | Test: {X_test.shape}")

# --------------------------------------------------------------
# 7. LABEL ENCODING
# --------------------------------------------------------------
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc  = le.transform(y_test)
num_classes = len(le.classes_)
print(f"Classes: {num_classes}")

with open("/content/label_map.json","w",encoding="utf-8") as f:
    json.dump({i:c for i,c in enumerate(le.classes_)}, f, ensure_ascii=False, indent=2)

# --------------------------------------------------------------
# 8. VISUALIZE ORIGINAL + AUGMENTED
# --------------------------------------------------------------
sample = cv2.imread(train_paths[0],0)
sample = cv2.resize(sample,(64,64))
aug = augment_image(sample)[:4]

plt.figure(figsize=(10,2))
imgs = [sample] + aug
titles = ['Original','+5°','-5°','1.1×','0.9×']
for i,im in enumerate(imgs):
    plt.subplot(1,5,i+1); plt.imshow(im,cmap='gray'); plt.title(titles[i]); plt.axis('off')
plt.suptitle(f"Character: {train_labels[0]}")
plt.show()

# --------------------------------------------------------------
# 9. MODELS
# --------------------------------------------------------------
def baseline_cnn():
    m = keras.Sequential([
        keras.layers.Conv2D(32,(5,5),activation='relu',input_shape=(64,64,1)),
        keras.layers.MaxPooling2D((2,2)),
        keras.layers.Dropout(0.2),
        keras.layers.Flatten(),
        keras.layers.Dense(128,activation='relu'),
        keras.layers.Dense(num_classes,activation='softmax')
    ])
    m.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
    return m

def deeper_cnn():
    m = keras.Sequential([
        keras.layers.Conv2D(32,(5,5),activation='relu',input_shape=(64,64,1)),
        keras.layers.MaxPooling2D((2,2)),
        keras.layers.Conv2D(64,(3,3),activation='relu'),
        keras.layers.MaxPooling2D((2,2)),
        keras.layers.Dropout(0.2),
        keras.layers.Flatten(),
        keras.layers.Dense(256,activation='relu'),
        keras.layers.Dense(128,activation='relu'),
        keras.layers.Dense(num_classes,activation='softmax')
    ])
    m.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
    return m

def larger_cnn():
    m = keras.Sequential([
        keras.layers.Conv2D(30,(5,5),activation='relu',input_shape=(64,64,1)),
        keras.layers.MaxPooling2D((2,2)),
        keras.layers.Conv2D(15,(3,3),activation='relu'),
        keras.layers.MaxPooling2D((2,2)),
        keras.layers.Dropout(0.2),
        keras.layers.Flatten(),
        keras.layers.Dense(128,activation='relu'),
        keras.layers.Dense(50,activation='relu'),
        keras.layers.Dense(num_classes,activation='softmax')
    ])
    m.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
    return m

models = [("Baseline CNN",baseline_cnn),
          ("Deeper CNN",deeper_cnn),
          ("Larger CNN",larger_cnn)]

# --------------------------------------------------------------
# 10. TRAIN & EVALUATE
# --------------------------------------------------------------
results = []
for name, fn in models:
    print(f"\n=== {name} ===")
    model = fn()
    model.fit(X_train, y_train_enc,
              validation_data=(X_test, y_test_enc),
              epochs=10, batch_size=128, verbose=1)
    loss, acc = model.evaluate(X_test, y_test_enc, verbose=0)
    results.append({"Model":name, "Test Accuracy":f"{acc:.4f}"})
    model.save(f"/content/model_{name.replace(' ','_')}.h5")
    print(f"{name} → {acc:.4%}")

# --------------------------------------------------------------
# 11. RESULT TABLE
# --------------------------------------------------------------
df_results = pd.DataFrame(results)
display(Markdown("### Model Comparison"))
display(df_results)

# --------------------------------------------------------------
# 12. BUILD GITHUB REPO FOLDER
# --------------------------------------------------------------
!mkdir -p /content/DIT5411-HoYiTik
!cp /content/model_*.h5 /content/DIT5411-HoYiTik/ 2>/dev/null || true
!cp /content/label_map.json /content/DIT5411-HoYiTik/
!mkdir -p /content/DIT5411-HoYiTik/sample_images
!cp {train_paths[0]} /content/DIT5411-HoYiTik/sample_images/original.png



Characters: 13065
Train raw: 444040 | Test: 111010
