## Hand writing image recognition using ANN in OpenCV

In [1]:
import gzip
import pickle
import cv2
import numpy as np

In [26]:
import gzip
import pickle

def load_data():
    print("Loading MNIST database...")
    with gzip.open('mnist.pkl.gz', 'rb') as file:
        #(train_images, training_ids), (test_images, tests_ids) = pickle.load(file, encoding='latin1')
        train_images, test_images = pickle.load(file, encoding='latin1')
    return (train_images, test_images)

def vectorized_result(j):
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e

def reformat_data():
    tr_d, te_d = load_data_enhanced()  # Use enhanced version
    training_inputs = tr_d[0]
    training_results = [vectorized_result(y) for y in tr_d[1]]
    training_data = zip(training_inputs, training_results)
    
    test_inputs = te_d[0]
    test_data = zip(test_inputs, te_d[1])
    
    return (training_data, test_data)

# Data: tuple containing training and test data of features and labels
# samples: number of samples to use for training
# epochs: number of iterations to use for training
def train_ann(ann, samples=50000, epochs=10):
    print("Training ANN model...")
    training_data, test_data = reformat_data()
    
    # Convert iterable to list and limit to 'samples' number of samples
    training_data = list(training_data)[:samples]
    
    training_inputs = [np.array(x).reshape(-1, 1) for x, y in training_data]
    training_outputs = [y for x, y in training_data]
    
    training_inputs = np.array(training_inputs).reshape(len(training_inputs), -1)
    training_outputs = np.array(training_outputs).reshape(len(training_outputs), -1)
    
    ann.train(cv2.ml.TrainData_create(training_inputs.astype(np.float32), 
                                     cv2.ml.ROW_SAMPLE, 
                                     training_outputs.astype(np.float32)))
    
    return ann, test_data

In [None]:
# Alternative method using requests library
import requests
import io

def load_data_from_url_direct():
    """Load MNIST data directly from URL without saving to disk"""
    print(f"Downloading and loading MNIST from {url}...")
    response = requests.get(url)
    response.raise_for_status()
    
    # Load directly from memory
    with gzip.open(io.BytesIO(response.content), 'rb') as file:
        (train_images, train_labels), (test_images, test_labels) = pickle.load(file, encoding='latin1')
    
    print("Data loaded successfully!")
    return (train_images, train_labels), (test_images, test_labels)

In [None]:
# Method with progress bar (install tqdm if needed: pip install tqdm)
from tqdm import tqdm

def download_with_progress(url, filename):
    """Download file with progress bar"""
    if os.path.exists(filename):
        print(f"{filename} already exists.")
        return
    
    print(f"Downloading {filename}...")
    response = requests.get(url, stream=True)
    total_size = int(response.headers.get('content-length', 0))
    
    with open(filename, 'wb') as file, tqdm(
        desc=filename,
        total=total_size,
        unit='B',
        unit_scale=True,
        unit_divisor=1024,
    ) as pbar:
        for chunk in response.iter_content(chunk_size=8192):
            if chunk:
                file.write(chunk)
                pbar.update(len(chunk))
    print("Download completed!")

In [13]:
# Create and configure the Artificial Neural Network model
def create_ann(hidden_nodes=60): # Caller can specify number of hidden nodes, but default is 60
    ann = cv2.ml.ANN_MLP_create()
    ann.setLayerSizes(np.array([784, hidden_nodes, 10])) # Input layer: 784 nodes, Hidden layer: hidden_nodes, Output layer: 10 nodes
    ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM, 0.6, 1.0)
    ann.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.1, 0.1)
    ann.setTermCriteria((cv2.TERM_CRITERIA_MAX_ITER | cv2.TERM_CRITERIA_EPS, 100, 1.0))
    return ann

In [16]:
# Create training function and train the ANN model
def train_ann(ann, samples=50000, epochs=10):
    print("Training ANN model...")
    training_data, test_data = reformat_data()

    # Convert iterable to list and limit to 'samples' number of samples
    training_data = list(training_data)[:samples]

    for epoch in range(epochs):
        print(f" Completed %d/%d epochs" % (epoch, epochs))
        counter = 0
        for image in training_data:
            if (counter > samples):
                break
            if (counter % 1000 == 0):
                print(f"Epoch %d: Trained on %d/%d samples" % (epoch, counter, len(training_data)))
            counter += 1
        
            sample, response = image
            data = cv2.ml.TrainData_create(
                np.array([sample], np.float32),
                cv2.ml.ROW_SAMPLE,
                np.array([response], np.float32)
            )

            if ann.isTrained():
                ann.train(data, cv2.ml.ANN_MLP_UPDATE_WEIGHTS | cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)
            else:
                ann.train(data, cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)
        return (ann, test_data)
    print("Training completed.")

In [18]:
# Create prediction function to evaluate the ANN model
from random import sample


def predict_ann(ann, test_data):
    if test_data.shape != (784, ):
        if test_data.shape != (28, 28):
            interpolation = cv2.INTER_LINEAR
        test_data = sample.reshape(784, )
    return ann.predict(np.array([test_data], np.float32))

In [23]:
# Create test function to evaluate the ANN model
def test_ann(ann, test_data):
    print("Testing ANN model...")
    num_tests = 0
    number_correct = 0

    for image, label in test_data:
        num_tests += 1
        result = predict_ann(ann, image)
        predicted_label = np.argmax(result[0])
        if predicted_label == label:
            number_correct += 1

    accuracy = number_correct / num_tests
    print(f"Accuracy: {accuracy * 100:.2f}% ({number_correct}/{num_tests})")

In [29]:
# Test with synthetic data since download is timing out
print("Creating synthetic data for testing OpenCV ANN...")

# Create synthetic MNIST-like data for testing
def create_synthetic_data(n_samples=1000):
    """Create synthetic data similar to MNIST format"""
    np.random.seed(42)  # For reproducibility
    
    # Create random 28x28 images (flattened to 784 features)
    images = np.random.rand(n_samples, 784).astype(np.float32)
    
    # Create random labels (0-9)
    labels = np.random.randint(0, 10, n_samples)
    
    # Split into train/test (80/20)
    split = int(0.8 * n_samples)
    train_images = images[:split]
    train_labels = labels[:split]
    test_images = images[split:]
    test_labels = labels[split:]
    
    return (train_images, train_labels), (test_images, test_labels)

# Create synthetic data
(train_data, train_labels), (test_data, test_labels) = create_synthetic_data(1000)

print(f"Training data shape: {train_data.shape}")
print(f"Training labels shape: {train_labels.shape}")
print(f"Test data shape: {test_data.shape}")
print(f"Test labels shape: {test_labels.shape}")

# Test the OpenCV ANN
ann = create_ann()

# Prepare labels in one-hot format for training
train_labels_onehot = np.zeros((len(train_labels), 10), dtype=np.float32)
for i, label in enumerate(train_labels):
    train_labels_onehot[i, label] = 1.0

print("\nTraining OpenCV ANN with synthetic data...")

# Create training data for OpenCV
train_data_cv = cv2.ml.TrainData_create(
    train_data.astype(np.float32),
    cv2.ml.ROW_SAMPLE,
    train_labels_onehot.astype(np.float32)
)

# Train the network
ann.train(train_data_cv)

print("Training completed!")

# Test the network
print("\nTesting the network...")
test_predictions = ann.predict(test_data.astype(np.float32))[1]
predicted_labels = np.argmax(test_predictions, axis=1)

# Calculate accuracy
accuracy = np.mean(predicted_labels == test_labels) * 100
print(f"Test accuracy with synthetic data: {accuracy:.2f}%")

print("\nNote: This is synthetic random data, so accuracy will be around 10% (random chance)")
print("Once MNIST data is available, accuracy should be much higher!")

Creating synthetic data for testing OpenCV ANN...
Training data shape: (800, 784)
Training labels shape: (800,)
Test data shape: (200, 784)
Test labels shape: (200,)

Training OpenCV ANN with synthetic data...
Training completed!

Testing the network...
Test accuracy with synthetic data: 11.00%

Note: This is synthetic random data, so accuracy will be around 10% (random chance)
Once MNIST data is available, accuracy should be much higher!


## Alternative: Download MNIST Manually

If the automatic download fails due to network issues, you can:

1. **Download manually**: Go to http://deeplearning.net/data/mnist/mnist.pkl.gz and save it to your working directory
2. **Use alternative sources**: Try downloading from other MNIST sources
3. **Use TensorFlow's MNIST**: Load using `tf.keras.datasets.mnist.load_data()` instead

Once you have the MNIST data file (`mnist.pkl.gz`), the enhanced `load_data_enhanced()` function will work perfectly!

In [30]:
# Alternative: Use TensorFlow's MNIST dataset
import tensorflow as tf

def load_mnist_tensorflow():
    """Load MNIST using TensorFlow and format for OpenCV"""
    print("Loading MNIST from TensorFlow...")
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
    
    # Normalize and flatten the images
    x_train = x_train.reshape(x_train.shape[0], -1) / 255.0
    x_test = x_test.reshape(x_test.shape[0], -1) / 255.0
    
    return (x_train, y_train), (x_test, y_test)

def train_ann_tensorflow(samples=10000):
    """Train OpenCV ANN using TensorFlow's MNIST data"""
    (train_data, train_labels), (test_data, test_labels) = load_mnist_tensorflow()
    
    # Limit training samples
    train_data = train_data[:samples]
    train_labels = train_labels[:samples]
    
    # Convert labels to one-hot encoding
    train_labels_onehot = np.zeros((len(train_labels), 10), dtype=np.float32)
    for i, label in enumerate(train_labels):
        train_labels_onehot[i, label] = 1.0
    
    # Create and train ANN
    ann = create_ann()
    train_data_cv = cv2.ml.TrainData_create(
        train_data.astype(np.float32),
        cv2.ml.ROW_SAMPLE,
        train_labels_onehot
    )
    
    print(f"Training with {len(train_data)} samples...")
    ann.train(train_data_cv)
    
    # Test the network
    print("Testing on real MNIST data...")
    test_predictions = ann.predict(test_data[:1000].astype(np.float32))[1]
    predicted_labels = np.argmax(test_predictions, axis=1)
    
    accuracy = np.mean(predicted_labels == test_labels[:1000]) * 100
    print(f"MNIST Test Accuracy: {accuracy:.2f}%")
    
    return ann

# Uncomment to run with TensorFlow MNIST:
# ann_tf = train_ann_tensorflow(samples=5000)

In [31]:
# Test with TensorFlow MNIST (uncommented)
print("Testing OpenCV ANN with real MNIST data from TensorFlow...")
ann_tf = train_ann_tensorflow(samples=5000)

Testing OpenCV ANN with real MNIST data from TensorFlow...
Loading MNIST from TensorFlow...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Training with 5000 samples...
Testing on real MNIST data...
MNIST Test Accuracy: 90.40%
