In [1]:
import struct
import csv

In [2]:
def read_idx1_ubyte(label_file):
    with open(label_file, 'rb') as f:
        magic, num_items = struct.unpack('>II', f.read(8))
        if magic != 2049:
            raise ValueError(f'Invalid label file magic number: {magic}')
        labels = list(f.read(num_items))
    return labels

def read_idx3_ubyte(image_file):
    with open(image_file, 'rb') as f:
        magic, num_images, rows, cols = struct.unpack('>IIII', f.read(16))
        if magic != 2051:
            raise ValueError(f'Invalid image file magic number: {magic}')
        images = []
        for _ in range(num_images):
            image = list(f.read(rows * cols))
            images.append(image)
    return images

In [3]:
def convert_mnist_to_csv(label_file, image_file, output_csv):
    labels = read_idx1_ubyte(label_file)
    images = read_idx3_ubyte(image_file)

    if len(labels) != len(images):
        raise ValueError("Number of labels and images do not match")

    with open(output_csv, 'w', newline='') as f_out:
        writer = csv.writer(f_out)
        header = ['label'] + [f'pixel{i}' for i in range(784)]
        writer.writerow(header)

        for label, image in zip(labels, images):
            writer.writerow([label] + image)


In [4]:
# Example usage
convert_mnist_to_csv('OriginalMNISTDataset/train-labels.idx1-ubyte', 'OriginalMNISTDataset/train-images.idx3-ubyte', 'CSVMNISTDataset/mnist_train.csv')

In [5]:
convert_mnist_to_csv('OriginalMNISTDataset/test-labels.idx1-ubyte', 'OriginalMNISTDataset/test-images.idx3-ubyte', 'CSVMNISTDataset/mnist_test.csv')