# Convolutional neural networks (CNN) for CIFAR-10/100 using MLX

Markus Enzweiler, markus.enzweiler@hs-esslingen.de

This is a demo used in a Computer Vision & Machine Learning lecture. Feel free to use and contribute.

We build and train a CNN for CIFAR-10 / CIFAR-100 image classification, see https://www.cs.toronto.edu/~kriz/cifar.html. We use the Python code from https://github.com/menzHSE/mlx-cifar-10-cnn.git and execute it via this notebook. 


**Note: This requires a machine with an Apple SoC, e.g. M1/M2/M3 etc.**

See: https://github.com/ml-explore/mlx

## Setup

Adapt `packagePath` to point to the directory containing this notebeook.

In [1]:
# Imports
import sys
import os
import threading
import subprocess
import fcntl
import errno

In [2]:
# Package Path
package_path = "./" # local
print(f"Package path: {package_path}")


def check_for_colab():
  try:
      import google.colab
      return True
  except ImportError:
      return False

# Running on Colab?
on_colab = check_for_colab()

Package path: ./


In [3]:
# Clone git repository

# Absolute path of the repository directory
repo_dir = os.path.join(package_path, "mlx-cifar-10-cnn")
repo_url = "https://github.com/menzHSE/mlx-cifar-10-cnn.git"

# Store the original working directory
original_cwd = os.getcwd()

# Check if the directory already exists using the absolute path
if os.path.exists(os.path.join(original_cwd, repo_dir)):
    print("Repository exists. Resetting to HEAD...")
    # Navigate into the repository directory
    os.chdir(repo_dir)
    # Fetch the latest changes from the remote
    subprocess.run(["git", "fetch", "origin"])
    # Reset the local branch to the latest commit from the remote
    subprocess.run(["git", "reset", "--hard", "origin/HEAD"])
    # Change back to the original working directory
    os.chdir(original_cwd)
else:
    print("Cloning repository...")
    # Clone the repository if it doesn't exist
    subprocess.run(["git", "clone", repo_url, repo_dir])


Repository exists. Resetting to HEAD...
HEAD is now at d09c838 fixed saving to dirs


From https://github.com/menzHSE/mlx-cifar-10-cnn
   54b4cf2..d09c838  main       -> origin/main


In [4]:
# Install requirements in the current Jupyter kernel
req_file = os.path.join(repo_dir, "requirements.txt")
if os.path.exists(req_file):
    !{sys.executable} -m pip install -r {req_file}
else:
    print(f"Requirements file not found: {req_file}")



## Functions to interface with the code in the repository

In [5]:
def execute(script_name, params=None):
    if on_colab:
        executeCaptureColab(script_name, params)
    else:
        executeCapture(script_name, params)

def executeCapture(script_name, params=None):
    script_path = os.path.join(repo_dir, script_name)
    if os.path.exists(script_path):
        print(f"Executing script: {script_path}")
        # Create the command list starting with Python and the script path
        command = ["python", script_path]
        # Add additional arguments from the params dictionary
        if params:
            for key, value in params.items():
                command.append(f"--{key}")
                command.append(str(value))
        print(command)
        subprocess.run(command)
    else:
        print(f"Script not found: {script_path}")

# This is very hacky ... but it's hard to capture the output of a subprocess in Colab
def executeCaptureColab(script_name, params=None):
    script_path = os.path.join(repo_dir, script_name)
    if os.path.exists(script_path):
        print(f"Executing script: {script_path}")
        # Create the command list starting with Python and the script path
        command = ["python", script_path]
        # Add additional arguments from the params dictionary
        if params:
            for key, value in params.items():
                if value is not None:  # Check if the value is None
                    command.append(f"--{key}")
                    command.append(str(value))
                else:
                    command.append(f"--{key}")
        print("Command:", " ".join(command))

        # Start the subprocess
        process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)

        # Set the stdout to non-blocking
        fd = process.stdout.fileno()
        fl = fcntl.fcntl(fd, fcntl.F_GETFL)
        fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)

        # Function to continuously output lines from a stream
        def stream_output(stream):
            while True:
                try:
                    line = stream.readline()
                    if line:
                        print(line, end='')
                    elif process.poll() is not None:
                        break
                except IOError as e:
                    # Ignore the error if no data is available yet
                    if e.errno != errno.EAGAIN and e.errno != errno.EWOULDBLOCK:
                        raise

        # Use a thread to capture the output stream
        output_thread = threading.Thread(target=stream_output, args=(process.stdout,))
        output_thread.start()

        # Wait for the subprocess to complete and the output thread to end
        process.wait()
        output_thread.join()

    else:
        print(f"Script not found: {script_path}")

In [6]:
# Let's see what we can do with train.py
execute("train.py", {"help": None})

Executing script: ./mlx-cifar-10-cnn/train.py
['python', './mlx-cifar-10-cnn/train.py', '--help', 'None']
usage: Train a simple CNN on CIFAR-10 / CIFAR_100 with mlx.
       [-h] [--cpu] [--seed SEED] [--batchsize BATCHSIZE] [--epochs EPOCHS]
       [--lr LR] [--dataset {CIFAR-10,CIFAR-100}]

options:
  -h, --help            show this help message and exit
  --cpu                 Use CPU instead of Metal GPU acceleration
  --seed SEED           Random seed
  --batchsize BATCHSIZE
                        Batch size for training
  --epochs EPOCHS       Number of training epochs
  --lr LR               Learning rate
  --dataset {CIFAR-10,CIFAR-100}
                        Select the dataset to use (CIFAR-10 or CIFAR-100)


# Train and test CNN on CIFAR-10



## Parameters

In [7]:
# parameters
batchsize = 32
seed      = 42
lr        = 3e-4
epochs    = 30
dataset   = "CIFAR-10"

## Train

In [8]:
params = {
    "dataset": dataset,           # dataset name
    "batchsize": batchsize,       # batch size
    "seed": seed,                 # random seed
    "lr": lr,                     # learning rate
    "epochs": epochs              # number of epochs
}

# Execute 'train.py' with parameters
execute("train.py", params=params)

Executing script: ./mlx-cifar-10-cnn/train.py
['python', './mlx-cifar-10-cnn/train.py', '--dataset', 'CIFAR-10', '--batchsize', '32', '--seed', '42', '--lr', '0.0003', '--epochs', '30']
Options: 
  Device: GPU
  Seed: 42
  Batch size: 32
  Number of epochs: 30
  Learning rate: 0.0003
  Dataset: CIFAR-10
Number of trainable params: 0.5506 M
Starting training ...
Epoch    0: Loss 1.70032 | Train accuracy  0.495 | Test accuracy  0.497 | Throughput    1015.97 images/second |  Time   51.035 (s)
Epoch    1: Loss 1.33132 | Train accuracy  0.580 | Test accuracy  0.581 | Throughput    1098.47 images/second |  Time   46.751 (s)
Epoch    2: Loss 1.22415 | Batch   560 | Train accuracy  0.563 | Throughput    1137.34 images/second

KeyboardInterrupt: 

## Test 

In [None]:
# parameters
params = {
    "model": f"models/model_{dataset}_{epochs-1:03d}.npz" # model name    
}

# Execute 'train.py' with parameters
execute("test.py", params=params)

# Train and test CNN on CIFAR-100

## Parameters

In [None]:
# parameters
batchsize = 32
seed      = 42
lr        = 3e-4
epochs    = 30
dataset   = "CIFAR-100"

## Train

In [None]:
params = {
    "dataset": dataset,           # dataset name
    "batchsize": batchsize,       # batch size
    "seed": seed,                 # random seed
    "lr": lr,                     # learning rate
    "epochs": epochs              # number of epochs
}

# Execute 'train.py' with parameters
execute("train.py", params=params)

## Test

In [None]:
# parameters
params = {
    "dataset": dataset,           # dataset name
    "model": f"models/model_{dataset}_{epochs-1:03d}.npz" # model name    
}

# Execute 'train.py' with parameters
execute("test.py", params=params)