In [1]:
import numpy as np
import struct
import matplotlib.pyplot as plt
from pathlib import Path
import sys
sys.path.insert(0, "../")

In [2]:
# Function to read MNIST data
def read_images(filename):
    with open(filename, 'rb') as f:
        magic, num, rows, cols = struct.unpack(">IIII", f.read(16))
        images = np.frombuffer(f.read(), dtype=np.uint8).reshape(num, rows, cols)
    return images

# Function to read MNIST labels
def read_labels(filename):
    with open(filename, 'rb') as f:
        magic, num = struct.unpack(">II", f.read(8))
        labels = np.frombuffer(f.read(), dtype=np.uint8)
    return labels

In [3]:
# Read files
train_data = read_images("./../../../data/train-images.idx3-ubyte")
train_labels = read_labels("./../../../data/train-labels.idx1-ubyte")
test_data = read_images("./../../../data/t10k-images.idx3-ubyte")
test_labels = read_labels("./../../../data/t10k-labels.idx1-ubyte")

In [None]:
# Print dataset relevant information
print('Train data length: ', len(train_data), 'images')
print('Train labels length: ', len(train_labels), 'labels')
print('Test data length: ', len(test_data), 'images')
print('Test labels length: ', len(test_labels), 'labels')
print('Data format: ', train_data[0].shape )
print('Train data shape:', train_data.shape)
print('Test data shape:', test_data.shape)
print('First label: ', train_labels[0])
print('Pixel value range:', train_data.min(), 'to', train_data.max())
print('Unique labels:', np.unique(train_labels))

In [None]:
# Display 10 MNIST training images with their labels
fig, axes = plt.subplots(2, 5, figsize=(10, 5))
for i, ax in enumerate(axes.flat):
    ax.imshow(train_data[i], cmap='gray')
    ax.set_title(f"Label: {train_labels[i]}")
    ax.axis('off')
plt.tight_layout()
plt.show()

In [None]:
Xsel = test_data                   # (N,28,28) uint8 0..255
ysel = test_labels                 # (N,)     uint8 label
OUTPUT_PATH=Path(f'./../../opencl/data')
OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
Xsel.tofile(OUTPUT_PATH / f"test_images_u8.bin")  # N*784 bytes
ysel.tofile(OUTPUT_PATH / f"test_labels.bin")     # N bytes