From f08f1afc14e4919d83c05ba60c4cf5b0dfd223a1 Mon Sep 17 00:00:00 2001
From: Github Executorch <github_executorch@arm.com>
Date: Thu, 16 Oct 2025 01:13:19 -0700
Subject: [PATCH] Summary: Pico2 demo of neural network (MNIST)

Test Plan:
1. Setup Conda
2. examples/arm/setup.sh --i-agree-to-the-contained-eula; source examples/
arm/ethos-u-scratch/setup_path.sh
3. cd examples/raspberry_pi/pico2
4. python export_mlp_mnist.py; ./build_firmware_pico.sh --model=balanced_tiny_mlp_mnist.pte

Reviewers:

Subscribers:

Tasks:

Tags:
---
 examples/raspberry_pi/pico2/CMakeLists.txt    |  11 +-
 examples/raspberry_pi/pico2/README.md         | 139 ++++++--
 .../raspberry_pi/pico2/build_firmware_pico.sh |   6 +-
 .../raspberry_pi/pico2/export_mlp_mnist.py    | 228 +++++++++++++
 examples/raspberry_pi/pico2/main.cpp          | 305 +++++++++++++++---
 5 files changed, 609 insertions(+), 80 deletions(-)
 create mode 100644 examples/raspberry_pi/pico2/export_mlp_mnist.py
diff --git a/examples/raspberry_pi/pico2/CMakeLists.txt b/examples/raspberry_pi/pico2/CMakeLists.txt
index 34d9833e6c0..16bb397252f 100644
--- a/examples/raspberry_pi/pico2/CMakeLists.txt
+++ b/examples/raspberry_pi/pico2/CMakeLists.txt
@@ -64,11 +64,14 @@ set(MODEL_STAMP "${CMAKE_CURRENT_BINARY_DIR}/model_pte.stamp")
 
 add_custom_command(
   OUTPUT ${MODEL_STAMP}
-  COMMAND python3 ${EXECUTORCH_ROOT}/executorch/examples/rpi/pte_to_array.py
-          --model ${INPUT_MODEL} --file ${MODEL_PTE_C}
+  COMMAND
+    python3
+    ${EXECUTORCH_ROOT}/executorch/examples/raspberry_pi/pico2/pte_to_array.py
+    --model ${INPUT_MODEL} --file ${MODEL_PTE_C}
   COMMAND ${CMAKE_COMMAND} -E touch ${MODEL_STAMP}
-  DEPENDS ${INPUT_MODEL}
-          ${EXECUTORCH_ROOT}/executorch/examples/rpi/pte_to_array.py
+  DEPENDS
+    ${INPUT_MODEL}
+    ${EXECUTORCH_ROOT}/executorch/examples/raspberry_pi/pico2/pte_to_array.py
   COMMENT "Injecting PTE data from '${INPUT_MODEL}' into model_pte.c"
   VERBATIM
 )
diff --git a/examples/raspberry_pi/pico2/README.md b/examples/raspberry_pi/pico2/README.md
index 44069cb659e..976754d6c5e 100644
--- a/examples/raspberry_pi/pico2/README.md
+++ b/examples/raspberry_pi/pico2/README.md
@@ -1,51 +1,142 @@
 # Overview
-This document outlines the steps required to run a simple Add Module on the Pico2 microcontroller using executorch.
 
-## (Pre-requisistes) Prepare the Environment for Arm
+This document outlines the steps required to run a simple MNIST digit recognition neural network on the Pico2 microcontroller using ExecuTorch.
 
-1. Setup executorch development environment, Also see  <a href="https://docs.pytorch.org/executorch/main/tutorial-arm-ethos-u.html#software"/> for instructions on setting up the environment for Arm.
-2. Make sure you have the toolchain configured correctly.
+## Demo Model: Hand-crafted MNIST Classifier
+
+The included `export_mlp_mnist.py` creates a demonstration model with hand-crafted weights (not production-trained). This tiny MLP recognizes digits 0, 1, 4, and 7 using manually designed feature detectors.
+Note: This is a proof-of-concept. For production use, train your model on real MNIST data.
+
+## Bring Your Own Model
+
+This demo demonstrates ExecuTorch's ability to bring your own PyTorch model and deploy it to Pico2 with one simple script. The complete pipeline works from any PyTorch model to a runnable binary:
+
+### Train your PyTorch model
+
+Export using `torch.export()` and `to_edge()`
+Build firmware with one command
+Deploy directly to Pico2
+
+#### Important Caveats:
+
+- Memory constraints - Models must fit in 520KB SRAM
+- Missing operators - Some ops may not be supported
+- Selective builds - Include only operators your model uses
+
+## Memory Constraints & Optimization
+
+- Critical: Pico2 has limited memory:
+- 520KB SRAM (on-chip static RAM)
+- 4MB QSPI Flash (onboard storage)
+
+### Always apply optimization techniques on large models that do not fit in Pico2 memory:
+
+Large models will not fit. Keep your `.pte` files small!
+- Quantization (INT8, INT4)
+- Model pruning
+- Operator fusion
+- Selective builds (include only needed operators)
+For more details , refer to the [ExecuTorch Quantization Optimization Guide](https://docs.pytorch.org/executorch/1.0/quantization-optimization.html), [Model Export & Lowering](https://docs.pytorch.org/executorch/1.0/using-executorch-export.html) and [Selective Build support](https://docs.pytorch.org/executorch/1.0/kernel-library-selective-build.html)
+
+## (Prerequisites) Prepare the Environment for Arm
+
+Setup executorch development environment. Also see instructions for setting up the environment for Arm.
+Make sure you have the toolchain configured correctly. Refer to this [setup](https://docs.pytorch.org/executorch/1.0/backends-arm-ethos-u.html#development-requirements) for more details.
 
 ```bash
 which arm-none-eabi-gcc
---> return something like executorch/examples/arm/ethos-u-scratch/arm-gnu-toolchain-13.3.rel1-x86_64-arm-none-eabi/bin/arm-none-eabi-gcc
+# Should return: executorch/examples/arm/ethos-u-scratch/arm-gnu-toolchain-13.3.rel1-x86_64-arm-none-eabi/bin/arm-none-eabi-gcc
 ```
 
-## Build Pico2 Firmware with Executorch
+## Build Pico2 Firmware with ExecuTorch
+
+This involves two steps:
 
-This step involves two sub steps
+### Generate your model:
+
+```bash
+cd examples/raspberry_pi/pico2
+python export_mlp_mnist.py # Creates balanced_tiny_mlp_mnist.pte
+```
 
-1. Cross Compile Executorch for Arm Cortex M, Pico2 target
-2. Build the firmware with the input model provided (If not provided, it will use the default_model.pte)
+### Build firmware:
 
-Use the following command to build the firmware:
-``` bash
-executorch/examples/rpi/build_firmware_pico.sh --model=<path_to_model.pte>
+```bash
+# In the dir examples/raspberry_pi/pico2
+build_firmware_pico.sh --model=balanced_tiny_mlp_mnist.pte # This creates executorch_pico.uf2, a firmware image for Pico2
 ```
 
 ### Flash Firmware
 
-Hold the BOOTSEL button on the Pico2 and connect it to your computer; it will mount as RPI-RP2. Copy the executorch_pico.uf2 file to this drive.
+Hold the BOOTSEL button on Pico2 and connect to your computer. It mounts as `RPI-RP2`. Copy `executorch_pico.uf2` to this drive.
 
 ### Verify Execution
 
-Check that the Pico2's LED blinks 10 times at 500 ms interval to confirm successful firmware execution.
-The Pico2's LED should blink 10 times at 500 ms intervals, indicating successful firmware execution. If connected via serial, you should see:
-
+The Pico2 LED blinks 10 times at 500ms intervals for successful execution. Via serial terminal, you'll see:
 ```bash
-Method loaded [forward]
-Output: 13.000000, 136.000000, 24.000000, 131.000000
+...
+...
+PREDICTED: 4 (Expected: 4) ✅ CORRECT!
+
+==================================================
+
+=== Digit 7 ===
+############################
+############################
+                        ####
+                       ####
+                      ####
+                     ####
+                    ####
+                   ####
+                  ####
+                 ####
+                ####
+               ####
+              ####
+             ####
+            ####
+           ####
+          ####
+         ####
+        ####
+       ####
+      ####
+     ####
+    ####
+   ####
+  ####
+ ####
+####
+###
+
+Input stats: 159 white pixels out of 784 total
+Running neural network inference...
+✅ Neural network results:
+  Digit 0: 370.000
+  Digit 1: 0.000
+  Digit 2: -3.000
+  Digit 3: -3.000
+  Digit 4: 860.000
+  Digit 5: -3.000
+  Digit 6: -3.000
+  Digit 7: 1640.000 ← PREDICTED
+  Digit 8: -3.000
+  Digit 9: -3.000
+
+� PREDICTED: 7 (Expected: 7) ✅ CORRECT!
+
+==================================================
+
+🎉 All tests complete! PyTorch neural network works on Pico2!
 ```
 
 ### Debugging via Serial Terminal
 
-On macOS or Linux, open a serial terminal with:
-
+On macOS/Linux:
 ```bash
 screen /dev/tty.usbmodem1101 115200
 ```
+Replace `/dev/tty.usbmodem1101` with your device path. If LED blinks 10 times at 100ms intervals, check logs for errors, but if it blinks 10 times at 500ms intervals, it is successful!
 
-Replace /dev/tty.usbmodem1101 with your device path. This terminal shows program logs and errors. If
-the LED blinks 10 times at 100 ms intervals, your program hit an error state—check the logs here.
-
-These steps complete running the simple model on Pico2 using ExecuTorch.
+Result: A complete PyTorch → ExecuTorch → Pico2 demo neural network deployment! 🚀
diff --git a/examples/raspberry_pi/pico2/build_firmware_pico.sh b/examples/raspberry_pi/pico2/build_firmware_pico.sh
index 8880a834777..03f5bfe2e7e 100755
--- a/examples/raspberry_pi/pico2/build_firmware_pico.sh
+++ b/examples/raspberry_pi/pico2/build_firmware_pico.sh
@@ -9,11 +9,11 @@
 # build_firmware_pico.sh
 # Simple script to cross-compile ExecuTorch and build Pico2 firmware with optional model input
 
-set -e
+set -euo pipefail
 
 # Paths
-ROOT_DIR="$(cd "$(dirname "$0")/../.." && pwd)"  # examples/rpi/ -> root dir
-PICO2_DIR="${ROOT_DIR}/examples/rpi/pico2"
+ROOT_DIR="$(cd "$(dirname "$0")/../../.." && pwd)"  # examples/raspberry_pi/ -> root dir
+PICO2_DIR="${ROOT_DIR}/examples/raspberry_pi/pico2"
 BUILD_DIR="${PICO2_DIR}/build"
 EXECUTORCH_BUILD_DIR="${ROOT_DIR}/cmake-out"
 
diff --git a/examples/raspberry_pi/pico2/export_mlp_mnist.py b/examples/raspberry_pi/pico2/export_mlp_mnist.py
new file mode 100644
index 00000000000..f2dbcaf7e1b
--- /dev/null
+++ b/examples/raspberry_pi/pico2/export_mlp_mnist.py
@@ -0,0 +1,228 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from executorch.exir import EdgeCompileConfig, to_edge
+
+from torch.export import export
+
+
+# Constants
+INPUT_SIZE = 784  # 28*28 for MNIST
+HIDDEN1_SIZE = 32
+HIDDEN2_SIZE = 16
+OUTPUT_SIZE = 10
+IMAGE_SIZE = 28
+
+
+class TinyMLPMNIST(nn.Module):
+    """A small MLP for MNIST digit classification."""
+
+    def __init__(self):
+        super().__init__()
+        self.fc1 = nn.Linear(INPUT_SIZE, HIDDEN1_SIZE)
+        self.fc2 = nn.Linear(HIDDEN1_SIZE, HIDDEN2_SIZE)
+        self.fc3 = nn.Linear(HIDDEN2_SIZE, OUTPUT_SIZE)
+
+    def forward(self, x):
+        """Forward pass through the network."""
+        x = x.reshape(x.size(0), -1)
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        x = self.fc3(x)
+        return x
+
+
+def create_balanced_model():
+    """
+    Create a balanced MLP model for MNIST digit classification.
+
+    The model is pre-initialized with specific weights to recognize
+    digits 0, 1, 4, and 7 through hand-crafted feature detectors.
+
+    Returns:
+        torch.nn.Module: A TinyMLPMNIST model with balanced weights
+    """
+    model = TinyMLPMNIST()
+
+    with torch.no_grad():
+        # Zero everything first
+        for param in model.parameters():
+            param.fill_(0.0)
+
+        # Feature 0: Vertical lines (for 1, 4, 7)
+        for row in range(IMAGE_SIZE):
+            # Middle column
+            model.fc1.weight[0, row * IMAGE_SIZE + 14] = 2.0
+
+        # Feature 1: Top horizontal (for 7, 4)
+        model.fc1.weight[1, 0:84] = 2.0  # Top 3 rows
+
+        # Feature 2: Bottom horizontal (for 1, 4)
+        model.fc1.weight[2, 25 * IMAGE_SIZE :] = 2.0  # Bottom 3 rows
+
+        # Feature 3: STRONGER Oval detector for 0
+        # Top and bottom curves
+        model.fc1.weight[3, 1 * IMAGE_SIZE + 8 : 1 * IMAGE_SIZE + 20] = 2.0
+        model.fc1.weight[3, 26 * IMAGE_SIZE + 8 : 26 * IMAGE_SIZE + 20] = 2.0
+        # Left and right sides
+        for row in range(4, 24):
+            model.fc1.weight[3, row * IMAGE_SIZE + 7] = 2.0  # Left
+            model.fc1.weight[3, row * IMAGE_SIZE + 20] = 2.0  # Right
+        # Anti-middle (hollow center)
+        for row in range(10, 18):
+            model.fc1.weight[3, row * IMAGE_SIZE + 14] = -1.5
+
+        # Feature 4: Middle horizontal (for 4) - make it STRONGER
+        model.fc1.weight[4, 13 * IMAGE_SIZE : 15 * IMAGE_SIZE] = 3.0
+
+        # Second layer: More decisive detection
+        # Digit 0 detector: STRONG oval requirement
+        model.fc2.weight[0, 3] = 5.0  # Strong oval requirement
+        model.fc2.weight[0, 0] = -2.0  # Anti-vertical
+        model.fc2.weight[0, 4] = -3.0  # Anti-middle horizontal
+
+        # Digit 1 detector: vertical + bottom - others
+        model.fc2.weight[1, 0] = 3.0  # Vertical
+        model.fc2.weight[1, 2] = 2.0  # Bottom
+        model.fc2.weight[1, 1] = -1.0  # Anti-top
+        model.fc2.weight[1, 3] = -2.0  # Anti-oval
+
+        # Digit 4 detector: REQUIRES middle horizontal
+        model.fc2.weight[2, 0] = 2.0  # Vertical
+        model.fc2.weight[2, 1] = 1.0  # Top
+        model.fc2.weight[2, 4] = 4.0  # STRONG middle requirement
+        model.fc2.weight[2, 3] = -2.0  # Anti-oval
+
+        # Digit 7 detector: top + some vertical - bottom
+        model.fc2.weight[3, 1] = 3.0  # Top
+        model.fc2.weight[3, 0] = 1.0  # Some vertical
+        model.fc2.weight[3, 2] = -2.0  # Anti-bottom
+
+        # Output layer
+        model.fc3.weight[0, 0] = 5.0  # Digit 0
+        model.fc3.weight[1, 1] = 5.0  # Digit 1
+        model.fc3.weight[4, 2] = 5.0  # Digit 4
+        model.fc3.weight[7, 3] = 5.0  # Digit 7
+
+        # Bias against other digits
+        for digit in [2, 3, 5, 6, 8, 9]:
+            model.fc3.bias[digit] = -3.0
+
+    return model
+
+
+def test_comprehensive(model):
+    """
+    Test model with clear digit patterns.
+
+    Args:
+        model: The PyTorch model to test
+    """
+
+    # Create clearer test patterns
+    def create_digit_1():
+        """Create a test pattern for digit 1."""
+        digit = torch.zeros(1, IMAGE_SIZE, IMAGE_SIZE)
+        # Thick vertical line in middle
+        digit[0, 2:26, 13:16] = 1.0  # Thick vertical line
+        # Top part (like handwritten 1)
+        digit[0, 2:5, 11:14] = 1.0
+        # Bottom base
+        digit[0, 24:27, 10:19] = 1.0
+        return digit
+
+    def create_digit_7():
+        """Create a test pattern for digit 7."""
+        digit = torch.zeros(1, IMAGE_SIZE, IMAGE_SIZE)
+        # Top horizontal line
+        digit[0, 1:4, 3:26] = 1.0
+        # Diagonal line
+        for i in range(23):
+            row = 4 + i
+            col = 23 - i
+            if 0 <= row < IMAGE_SIZE and 0 <= col < IMAGE_SIZE:
+                digit[0, row, col - 1 : col + 2] = 1.0  # Thick diagonal
+        return digit
+
+    def create_digit_0():
+        """Create a test pattern for digit 0."""
+        digit = torch.zeros(1, IMAGE_SIZE, IMAGE_SIZE)
+        # Oval shape
+        for row in range(3, 25):
+            for col in range(8, 20):
+                condition1 = ((row - 14) ** 2 / 11**2 + (col - 14) ** 2 / 6**2) <= 1
+                condition2 = ((row - 14) ** 2 / 8**2 + (col - 14) ** 2 / 3**2) > 1
+                if condition1 and condition2:
+                    digit[0, row, col] = 1.0
+        return digit
+
+    patterns = {
+        "Digit 1": create_digit_1(),
+        "Digit 7": create_digit_7(),
+        "Digit 0": create_digit_0(),
+    }
+
+    print("🧪 Testing with clear patterns:")
+    model.eval()
+    with torch.no_grad():
+        for name, pattern in patterns.items():
+            output = model(pattern)
+            pred = output.argmax().item()
+            confidence = F.softmax(output, dim=1)[0, pred].item()
+            print(f"   {name} → predicted: {pred} (confidence: {confidence:.3f})")
+
+            # Show top 3 predictions
+            top3 = output.topk(3, dim=1)
+            predictions = [
+                (top3.indices[0, i].item(), top3.values[0, i].item()) for i in range(3)
+            ]
+            print(f"      Top 3: {predictions}")
+
+
+def main():
+    """Main function to create, test, and export the model."""
+    print("🔥 Creating balanced MLP MNIST model...")
+
+    model = create_balanced_model()
+
+    # Test the model
+    test_comprehensive(model)
+
+    # Export
+    example_input = torch.randn(1, IMAGE_SIZE, IMAGE_SIZE)
+    param_count = sum(p.numel() for p in model.parameters())
+    print(f"\n📊 Model parameters: {param_count:,}")
+
+    print("📦 Exporting...")
+    with torch.no_grad():
+        exported_program = export(model, (example_input,))
+
+    print("⚙️ Converting to ExecuTorch...")
+    edge_config = EdgeCompileConfig(_check_ir_validity=False)
+    edge_manager = to_edge(exported_program, compile_config=edge_config)
+    et_program = edge_manager.to_executorch()
+
+    # Save with error handling
+    filename = "balanced_tiny_mlp_mnist.pte"
+    print(f"💾 Saving {filename}...")
+    try:
+        with open(filename, "wb") as f:
+            f.write(et_program.buffer)
+
+        model_size_kb = len(et_program.buffer) / 1024
+        print("✅ Export complete!")
+        print(f"📁 Model size: {model_size_kb:.1f} KB")
+    except IOError as e:
+        print(f"❌ Failed to save model: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/raspberry_pi/pico2/main.cpp b/examples/raspberry_pi/pico2/main.cpp
index afec859863c..8219e3bfc52 100644
--- a/examples/raspberry_pi/pico2/main.cpp
+++ b/examples/raspberry_pi/pico2/main.cpp
@@ -67,78 +67,281 @@ bool load_and_prepare_model(
     std::unique_ptr<Program>& program_ptr,
     std::unique_ptr<Method>& method_ptr,
     MemoryManager& memory_manager) {
+  printf("Loading model data (%u bytes)...\n", model_pte_len);
+
   executorch::extension::BufferDataLoader loader(model_pte, model_pte_len);
   auto program_result = Program::load(&loader);
   if (!program_result.ok()) {
-    printf("Failed to load model: %d\n", (int)program_result.error());
+    printf("❌ Failed to load model: error %d\n", (int)program_result.error());
+
+    // Print more detailed error info
+    switch (program_result.error()) {
+      case Error::InvalidProgram:
+        printf("   → Invalid program format\n");
+        break;
+      case Error::InvalidState:
+        printf("   → Invalid state\n");
+        break;
+      case Error::NotSupported:
+        printf("   → Feature not supported\n");
+        break;
+      case Error::NotFound:
+        printf("   → Resource not found\n");
+        break;
+      case Error::InvalidArgument:
+        printf("   → Invalid argument\n");
+        break;
+      default:
+        printf("   → Unknown error code: %d\n", (int)program_result.error());
+    }
+
     blink_indicator(INDICATOR_PIN_1, 10);
     return false;
   }
+
   program_ptr = std::make_unique<Program>(std::move(*program_result));
+  printf("✅ Program loaded successfully\n");
+
+  // Get method count and names
+  printf("📊 Program info:\n");
+  printf("   Method count: %zu\n", program_ptr->num_methods());
+
   auto method_name_result = program_ptr->get_method_name(0);
   if (!method_name_result.ok()) {
-    printf("Failed to get method name: %d\n", (int)method_name_result.error());
+    printf(
+        "❌ Failed to get method name: error %d\n",
+        (int)method_name_result.error());
     blink_indicator(INDICATOR_PIN_1, 10);
     return false;
   }
+
+  printf("   Method 0 name: %s\n", *method_name_result);
+
+  // Try to load the method - this is where operator errors usually happen
+  printf("🔄 Loading method '%s'...\n", *method_name_result);
   auto method_result =
       program_ptr->load_method(*method_name_result, &memory_manager);
+
   if (!method_result.ok()) {
-    printf("Failed to load method: %d\n", (int)method_result.error());
-    blink_indicator(INDICATOR_PIN_1, 10);
-    return false;
-  }
-  method_ptr = std::make_unique<Method>(std::move(*method_result));
-  printf("Method loaded [%s]\n", *method_name_result);
-  return true;
-}
+    printf("❌ Failed to load method: error %d\n", (int)method_result.error());
+
+    // More detailed method loading errors
+    switch (method_result.error()) {
+      case Error::InvalidProgram:
+        printf("   → Method has invalid program structure\n");
+        break;
+      case Error::InvalidState:
+        printf("   → Method in invalid state\n");
+        break;
+      case Error::NotSupported:
+        printf("   → Method uses unsupported operators\n");
+        printf(
+            "   → This usually means missing operators in selective build!\n");
+        break;
+      case Error::NotFound:
+        printf("   → Method resource not found\n");
+        break;
+      case Error::MemoryAllocationFailed:
+        printf("   → Not enough memory to load method\n");
+        break;
+      case Error::OperatorMissing:
+        printf("   → Operator missing\n");
+        break;
+      default:
+        printf("   → Unknown method error: %d\n", (int)method_result.error());
+    }
 
-bool validate_and_set_inputs(Method& method) {
-  float input_data_0[4] = {4.0, 109.0, 13.0, 123.0};
-  float input_data_1[4] = {9.0, 27.0, 11.0, 8.0};
-  TensorImpl::SizesType sizes[1] = {4};
-  TensorImpl::DimOrderType dim_order[] = {0};
-  TensorImpl impl0(ScalarType::Float, 1, sizes, input_data_0, dim_order);
-  TensorImpl impl1(ScalarType::Float, 1, sizes, input_data_1, dim_order);
-  Tensor input0(&impl0);
-  Tensor input1(&impl1);
-  if (method.set_input(input0, 0) != Error::Ok) {
-    printf("Failed to set input0\n");
-    blink_indicator(INDICATOR_PIN_1, 10);
-    return false;
-  }
-  if (method.set_input(input1, 1) != Error::Ok) {
-    printf("Failed to set input1\n");
     blink_indicator(INDICATOR_PIN_1, 10);
     return false;
   }
+
+  method_ptr = std::make_unique<Method>(std::move(*method_result));
+  printf("✅ Method '%s' loaded successfully\n", *method_name_result);
   return true;
 }
 
 bool run_inference(Method& method) {
-  if (!validate_and_set_inputs(method)) {
-    return false; // Input validation or setting failed
-  }
+  printf(
+      "🔥 ExecuTorch MLP MNIST Demo (Neural network) on Pico2 (microcontroller) 🔥\n");
 
-  if (method.execute() != Error::Ok) {
-    printf("Failed to execute\n");
-    blink_indicator(INDICATOR_PIN_1, 10);
-    return false;
-  }
-  const EValue& output = method.get_output(0);
-  if (output.isTensor()) {
-    const float* out_data = output.toTensor().const_data_ptr<float>();
-    printf(
-        "Output: %f, %f, %f, %f\n",
-        out_data[0],
-        out_data[1],
-        out_data[2],
-        out_data[3]);
-  } else {
-    printf("Output is not a tensor!\n");
-    blink_indicator(INDICATOR_PIN_1, 10);
-    return false;
+  // ASCII art for digit '0' (28x28)
+  const char* ascii_digit_0[28] = {
+      "                            ", "        ############        ",
+      "      ##################    ", "    ######################  ",
+      "   ######################## ", "  ####                ####  ",
+      " ####                  #### ", " ####                  #### ",
+      "####                    ####", "####                    ####",
+      "####                    ####", "####                    ####",
+      "####                    ####", "####                    ####",
+      "####                    ####", "####                    ####",
+      "####                    ####", "####                    ####",
+      "####                    ####", "####                    ####",
+      " ####                  #### ", " ####                  #### ",
+      "  ####                ####  ", "   ######################## ",
+      "    ######################  ", "      ##################    ",
+      "        ############        ", "                            "};
+
+  const char* ascii_digit_1[28] = {
+      "            ####            ", "           #####            ",
+      "          ######            ", "            ####            ",
+      "            ####            ", "            ####            ",
+      "            ####            ", "            ####            ",
+      "            ####            ", "            ####            ",
+      "            ####            ", "            ####            ",
+      "            ####            ", "            ####            ",
+      "            ####            ", "            ####            ",
+      "            ####            ", "            ####            ",
+      "            ####            ", "            ####            ",
+      "            ####            ", "            ####            ",
+      "            ####            ", "            ####            ",
+      "        ############        ", "        ############        ",
+      "        ############        ", "                            "};
+
+  const char* ascii_digit_4[28] = {
+      "                            ", "               ####         ",
+      "              #####         ", "             ######         ",
+      "            #######         ", "           #### ####        ",
+      "          ####  ####        ", "         ####   ####        ",
+      "        ####    ####        ", "       ####     ####        ",
+      "      ####      ####        ", "     ####       ####        ",
+      "    ####        ####        ", "   ####         ####        ",
+      "  ######################    ", "  ######################    ",
+      "  ######################    ", "                ####        ",
+      "                ####        ", "                ####        ",
+      "                ####        ", "                ####        ",
+      "                ####        ", "                ####        ",
+      "                ####        ", "                ####        ",
+      "                ####        ", "                            "};
+
+  const char* ascii_digit_7[28] = {
+      "############################", "############################",
+      "                        ####", "                       #### ",
+      "                      ####  ", "                     ####   ",
+      "                    ####    ", "                   ####     ",
+      "                  ####      ", "                 ####       ",
+      "                ####        ", "               ####         ",
+      "              ####          ", "             ####           ",
+      "            ####            ", "           ####             ",
+      "          ####              ", "         ####               ",
+      "        ####                ", "       ####                 ",
+      "      ####                  ", "     ####                   ",
+      "    ####                    ", "   ####                     ",
+      "  ####                      ", " ####                       ",
+      "####                        ", "###                         "};
+
+  // Test patterns
+  struct TestCase {
+    const char** pattern;
+    const char* name;
+    int expected_digit;
+  };
+
+  TestCase test_cases[] = {
+      {ascii_digit_0, "Digit 0", 0},
+      {ascii_digit_1, "Digit 1", 1},
+      {ascii_digit_4, "Digit 4", 4},
+      {ascii_digit_7, "Digit 7", 7}};
+
+  printf("🧪 Testing all supported digits:\n\n");
+
+  for (int test = 0; test < 4; test++) {
+    const char** ascii_digit = test_cases[test].pattern;
+    const char* digit_name = test_cases[test].name;
+    int expected = test_cases[test].expected_digit;
+
+    // Display the ASCII digit
+    printf("=== %s ===\n", digit_name);
+    for (int i = 0; i < 28; i++) {
+      printf("%s\n", ascii_digit[i]);
+    }
+    printf("\n");
+
+    // Convert ASCII to 28x28 float tensor
+    float input_data[784]; // 28*28 = 784
+    for (int row = 0; row < 28; row++) {
+      for (int col = 0; col < 28; col++) {
+        char pixel = ascii_digit[row][col];
+        input_data[row * 28 + col] = (pixel == '#') ? 1.0f : 0.0f;
+      }
+    }
+
+    // Count white pixels
+    int white_pixels = 0;
+    for (int i = 0; i < 784; i++) {
+      if (input_data[i] > 0.5f)
+        white_pixels++;
+    }
+    printf("Input stats: %d white pixels out of 784 total\n", white_pixels);
+
+    // Create input tensor: [1, 28, 28]
+    TensorImpl::SizesType input_sizes[3] = {1, 28, 28};
+    TensorImpl::DimOrderType dim_order[3] = {0, 1, 2};
+
+    TensorImpl input_impl(
+        ScalarType::Float,
+        3, // 3 dimensions: [batch, height, width]
+        input_sizes, // [1, 28, 28]
+        input_data,
+        dim_order);
+    Tensor input(&input_impl);
+
+    // Set input and run inference
+    printf("Running neural network inference...\n");
+
+    auto result = method.set_input(input, 0);
+    if (result != Error::Ok) {
+      printf("❌ Failed to set input: error %d\n", (int)result);
+      return false;
+    }
+
+    result = method.execute();
+    if (result != Error::Ok) {
+      printf("❌ Failed to execute: error %d\n", (int)result);
+      return false;
+    }
+
+    auto output_evalue = method.get_output(0);
+    if (!output_evalue.isTensor()) {
+      printf("❌ Output is not a tensor\n");
+      return false;
+    }
+
+    // Extract tensor from EValue
+    Tensor output = output_evalue.toTensor();
+    float* output_data = output.mutable_data_ptr<float>();
+
+    // Find digit with highest score
+    int predicted_digit = 0;
+    float max_score = output_data[0];
+    for (int i = 1; i < 10; i++) {
+      if (output_data[i] > max_score) {
+        max_score = output_data[i];
+        predicted_digit = i;
+      }
+    }
+
+    // Display results
+    printf("✅ Neural network results:\n");
+    for (int i = 0; i < 10; i++) {
+      printf("  Digit %d: %.3f", i, output_data[i]);
+      if (i == predicted_digit)
+        printf(" ← PREDICTED");
+      printf("\n");
+    }
+
+    // Check if correct
+    printf("\n🎯 PREDICTED: %d (Expected: %d) ", predicted_digit, expected);
+    if (predicted_digit == expected) {
+      printf("✅ CORRECT!\n");
+    } else {
+      printf("❌ WRONG!\n");
+    }
+
+    printf("\n==================================================\n\n");
   }
+
+  printf(
+      "🎉 All tests complete! ExecuTorch inference of neural network works on Pico2!\n");
   return true;
 }
 
@@ -150,8 +353,12 @@ int executor_runner() {
   wait_for_usb();
   runtime_init();
 
-  static uint8_t method_allocator_pool[32 * 1024]; // 32KB
-  static uint8_t activation_pool[64 * 1024]; // 64KB
+  // Fit within Pico2's 520KB SRAM limit
+  static uint8_t
+      method_allocator_pool[200 * 1024]; // 200KB - plenty for method metadata
+  static uint8_t activation_pool[200 * 1024]; // 200KB - plenty for activations
+  // Total: 400KB directly allocated to ExecuTorch, leaves 120KB for other uses
+
   MemoryAllocator method_allocator(
       sizeof(method_allocator_pool), method_allocator_pool);
   method_allocator.enable_profiling("method allocator");