# P7: Burmese Handwritten Digit Recognition (Deep Learning/CNN)
# -------------------------------------------------------------------------

# Cell 1: Import Libraries and Setup

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical

In [3]:
# Ensure TensorFlow is using a consistent backend for reproducibility
tf.random.set_seed(42)

print(f"TensorFlow Version: {tf.__version__}")
print("Deep Learning Libraries loaded successfully.")

TensorFlow Version: 2.20.0
Deep Learning Libraries loaded successfully.


# Cell 2: Synthetic Data Generation (Placeholder for Real Burmese Dataset)

In [4]:

IMG_HEIGHT = 28
IMG_WIDTH = 28
NUM_CLASSES = 10 # Assuming digits 0-9

# Create synthetic data: 1000 samples, 28x28 grayscale images
X_synthetic = np.random.rand(1000, IMG_HEIGHT, IMG_WIDTH).astype('float32') * 255
# Create synthetic labels (integers 0 to 9)
y_synthetic_int = np.random.randint(0, NUM_CLASSES, 1000)

print(f"\nSynthetic Data Generated:")
print(f"Feature shape (X): {X_synthetic.shape}")
print(f"Label shape (y): {y_synthetic_int.shape}")

# Split the synthetic data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train_int, y_test_int = train_test_split(
    X_synthetic, y_synthetic_int, test_size=0.2, random_state=42
)



Synthetic Data Generated:
Feature shape (X): (1000, 28, 28)
Label shape (y): (1000,)


# Cell 3: Data Preprocessing

In [5]:
X_train = X_train / 255.0
X_test = X_test / 255.0

# 3.2 Reshaping for CNN
# CNN expects an image depth channel. For grayscale (28x28), this is (28, 28, 1).
# If you use color images, the shape would be (IMG_HEIGHT, IMG_WIDTH, 3).
X_train = X_train.reshape(-1, IMG_HEIGHT, IMG_WIDTH, 1)
X_test = X_test.reshape(-1, IMG_HEIGHT, IMG_WIDTH, 1)

# 3.3 One-Hot Encoding
# Convert integer labels (e.g., 7) to categorical vectors (e.g., [0,0,0,0,0,0,0,1,0,0])
y_train = to_categorical(y_train_int, num_classes=NUM_CLASSES)
y_test = to_categorical(y_test_int, num_classes=NUM_CLASSES)

print("\nData Preprocessing Complete.")
print(f"X_train shape for CNN: {X_train.shape}")
print(f"y_train shape (one-hot): {y_train.shape}")


Data Preprocessing Complete.
X_train shape for CNN: (800, 28, 28, 1)
y_train shape (one-hot): (800, 10)


# Cell 4: Model Definition (Convolutional Neural Network - CNN)

In [6]:
model = Sequential([
    # Convolutional Layer 1
    Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 1)),
    MaxPooling2D((2, 2)),

    # Convolutional Layer 2
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.25), # Regularization to prevent overfitting

    # Fully Connected Layers
    Flatten(), # Flatten 2D feature maps into a 1D vector
    Dense(128, activation='relu'),
    Dropout(0.5),

    # Output Layer
    Dense(NUM_CLASSES, activation='softmax') # Softmax for multi-class classification
])

print("\nCNN Architecture Summary:")
model.summary()



CNN Architecture Summary:


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


# Cell 5: Model Compilation

In [7]:
# Use 'adam' optimizer and 'categorical_crossentropy' for multi-class problems
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

print("\nModel Compiled: Optimizer='adam', Loss='categorical_crossentropy'")


# Cell 6: Model Training

# Define training parameters
EPOCHS = 10
BATCH_SIZE = 32

print(f"\nStarting training for {EPOCHS} epochs...")

# Train the model
# Note: For real images, you would likely use Data Augmentation here (e.g., rotation, shift)
history = model.fit(
    X_train, y_train,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    validation_data=(X_test, y_test),
    verbose=1
)

print("\nModel Training Finished.")



Model Compiled: Optimizer='adam', Loss='categorical_crossentropy'

Starting training for 10 epochs...
Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.1050 - loss: 2.3151 - val_accuracy: 0.1400 - val_loss: 2.3031
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.0975 - loss: 2.3054 - val_accuracy: 0.1400 - val_loss: 2.3054
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.1112 - loss: 2.2995 - val_accuracy: 0.1400 - val_loss: 2.3100
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.1338 - loss: 2.2999 - val_accuracy: 0.1400 - val_loss: 2.3154
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.1063 - loss: 2.2985 - val_accuracy: 0.1400 - val_loss: 2.3190
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accur

# Cell 7: Model Evaluation (Using Placeholder Metrics)

In [8]:
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"\n--- Model Evaluation ---")
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

# Since the data is synthetic (random), the accuracy will be close to random chance (1/10 = 0.1)

# Display predictions for the first 5 test samples
y_pred_probs = model.predict(X_test[:5])
y_pred_classes = np.argmax(y_pred_probs, axis=1)

print("\nFirst 5 Test Predictions:")
print(f"True Labels (int): {y_test_int[:5]}")
print(f"Predicted Labels: {y_pred_classes}")


--- Model Evaluation ---
Test Loss: 2.3160
Test Accuracy: 0.1400
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step

First 5 Test Predictions:
True Labels (int): [3 2 9 6 0]
Predicted Labels: [0 0 0 0 0]
