This document is used to analysis all failed cases executed by previous [functional test](./capability.ipynb).

In [55]:
from collections import Counter
from enum import IntEnum
from pathlib import Path
import pickle
from pprint import pprint
import shutil
import subprocess
from tqdm import tqdm
from typing import Dict, List, Optional

# Preparation

In [56]:
IWASM = Path(shutil.which("iwasm"))
ADDR2LINE = Path(shutil.which("addr2line"))

PY2WASM = Path(shutil.which("py2wasm"))

PYTHON_COOKBOOK = Path.cwd().joinpath("./cookbook/src")
assert PYTHON_COOKBOOK.exists(), "use `git submodule update` to fetch cookbook"

WASI_SDK_HOME = Path("/opt/wasi-sdk-22.0")

WABT_HOME = Path("/opt/wabt-1.0.35")


In [57]:
# sync with [functional test](./capability.ipynb)
class CaseErrorCode(IntEnum):
    OK = 0
    COMPILATION_FAILURE = 1
    WASM_EXECUTION_FAILURE = 2
    PY_EXECUTION_FAILURE = 3
    DIFFERENT_RESULT = 4
    BYPASS = 5
    WASM_EXECUTION_TIMEOUT = 6
    PY_EXECUTION_TIMEOUT = 7

    def __str__(self):
        return f'{self.name}'

class CaseResult:
    def __init__(self, error_code: CaseErrorCode, msg: str):
        self.error_code = error_code
        self.msg = msg

    def __repr__(self):
        return f"{self.error_code}. \"{self.msg.strip()}\""


In [58]:
def restore_result(out_file: Path) -> Dict[str, CaseResult]:
    with out_file.open("rb") as f:
        return pickle.load(f)

def filter_wasm_execution_failure(reulsts: Dict[str, CaseResult]) -> Dict[str, CaseResult]:
    return {k: v for k, v in reulsts.items() if v.error_code == CaseErrorCode.WASM_EXECUTION_FAILURE}

def filter_result_error_msg(result: CaseResult, keyword: str) -> bool:
    for line in result.msg.split('\n'):
        # print(f"--> {line}")
        if line.startswith(keyword):
            return True
    
    return False

def case_name_to_wasm_file(chapter_n: str, case_name: str) -> Path:
    """
    if chapter_n is 0, and case_name is logging_test_output_to_a_file.test.
    the output should be
    0/logging_test_output_to_a_file/test.wasm
    """
    return PYTHON_COOKBOOK.joinpath(f"{chapter_n}/{case_name.replace('.', '/')}.wasm")

# Analysis

In [59]:
# {chapter number : {case_name, CaseResult}}
chapter_x_result = {}
for i in range(1, 15):
    chapter_x_result[i] = restore_result(Path(f"chapter_{i}_result.pkl"))

## `WASM_EXECUTION_FAILURE`

It seems all failure can be separated into two kinds according to their error message
- Exception
- Traceback

In [60]:
# {chapter number : {case_name: CaseResult}}
chapter_x_wasm_exec_failed = {}
for i in range(1, 15):
    chapter_x_wasm_exec_failed[i] = filter_wasm_execution_failure(chapter_x_result[i])

In [61]:
# {case_name: CaseResult}
chapter_x_wamr_exception = {}
chapter_x_wasm_python_exception = {}

counter = []
for chapter_n, chapter_i_wasm_exec_failed in chapter_x_wasm_exec_failed.items():
    for case_name, result in chapter_i_wasm_exec_failed.items():
        wasm_file = case_name_to_wasm_file(chapter_n, case_name)
        assert wasm_file.exists()

        if filter_result_error_msg(result, "Exception: "):
            chapter_x_wamr_exception[case_name] = result
            counter.append("Exception")

        elif filter_result_error_msg(result, "Traceback "):
            chapter_x_wasm_python_exception[case_name] =result
            counter.append("Traceback")

        else:
            print(f"chapter {i} {case_name}: {result}")
            assert False, f"unexpected failure message.  chapter {i} {case_name}: {result}"

counter = Counter(counter)
print(counter)

Counter({'Exception': 57, 'Traceback': 11})


### Exception(throw by wamr)

List all exception messages thrown by wamr. It turns out that all exceptions are thrown by WAMR can be splited into two kinds:
- *indirect call type mismatch*
- *out of bounds memory access*

In [62]:
counter = []
for case_name, result in chapter_x_wamr_exception.items():
    for line in result.msg.split("\n"):
        if line.startswith("Exception: "):
            counter.append(line)
            break

pprint(Counter(counter))

Counter({'Exception: indirect call type mismatch': 55,
         'Exception: out of bounds memory access': 2})


Use `addr2line` and a debug building of `iwasm` to find out the exact line of (runtime) code that throws the exception.

> ⚠️ iwasm should be compiled with `-DCMAKE_BUILD_TYPE=Debug -DWAMR_BUILD_DUMP_CALL_STACK=1 -DWAMR_BUILD_FAST_INTERP=0`

In [63]:
# traverse all cases in `chapter_x_wamr_exception`
# run the case with iwasm and export the callstack. `iwasm xxx.wasm > xxx.callstack`
# use addr2line to analyze the callstack. 
#   `addr2line --wasi-sdk {wasi_sdk_home} --wabt {wabt_home} --wasm-file xxx.wasm xxx.callstack`
# and save the result to `xxx.detail.callstack`

def execute_iwasm(wasm_file: Path, out_file: Path) -> subprocess.CompletedProcess:
    with open(out_file, "wt") as f:
        return subprocess.run(
            [
                IWASM, 
                "--dir=.",
                "--stack-size=134217728",
                wasm_file
            ],
            stdout=f,
            stderr=subprocess.PIPE,
            check=False,
        )

def addr2line(wasm_file: Path, callstack_file: Path, out_file: Path) -> subprocess.CompletedProcess:
    with open(out_file, "wt") as f:
        return subprocess.run(
            [
                ADDR2LINE, 
                "--wasi-sdk", WASI_SDK_HOME, 
                "--wabt", WABT_HOME, 
                "--wasm-file", wasm_file, 
                callstack_file
            ],
            stdout=f,
            stderr=subprocess.PIPE,
            check=True,
        )

def output_top_N_callstack_detail(callstack_detail_file: Path, N: int) -> List[str]:
    top_n_bt = []
    with open(callstack_detail_file) as f:
        for line in f.readlines():
            if "at " in line:
                continue
            
            top_n_bt.append(line.strip())
            N -= 1

            if N == 0:
                break
    
    return top_n_bt

def analyze_callstack(wasm_file: Path, topN: int) -> List[str]:
    callstack_file = wasm_file.with_suffix(".ctk")
    callstack_detail_file = wasm_file.with_suffix(".dtl.ctk")

    execute_iwasm(wasm_file, callstack_file)
    addr2line(wasm_file, callstack_file, callstack_detail_file)
    top_n_bt = output_top_N_callstack_detail(callstack_detail_file, topN)

    return top_n_bt

def compare_callstack(ctk_1: List[str], ctk_2: List[str]) -> bool:
    if len(ctk_1) != len(ctk_2):
        return False
    
    return all([c1 == c2 for c1, c2 in zip(ctk_1, ctk_2)])

#### indirect call type mismatch


In [70]:
variant_call_stack = []

for chapter_n, chapter_i_wasm_exec_failed in tqdm(chapter_x_wasm_exec_failed.items()):
    for case_name, result in chapter_i_wasm_exec_failed.items():
        if not filter_result_error_msg(result, "Exception: "):
            continue

        wasm_file = case_name_to_wasm_file(chapter_n, case_name)
        assert wasm_file.exists()

        wasm_file = case_name_to_wasm_file(chapter_n, case_name)
        assert wasm_file.exists(), f"{wasm_file} doesn't exist"

        top_n_bt = analyze_callstack(wasm_file, 5)
        for item in variant_call_stack:
            if compare_callstack(item, top_n_bt):
                break
        else:
            variant_call_stack.append(top_n_bt)

print(f"✨ There are {len(variant_call_stack)} different callstacks.")
pprint(variant_call_stack)

with open("variant_call_stack.pkl", "wb") as f:
    pickle.dump(variant_call_stack, f)

100%|██████████| 14/14 [18:09<00:00, 77.80s/it] 

✨ There are 16 different callstacks.
[['1: getset_get',
  '2: _PyObject_GenericGetAttrWithDict',
  '3: PyObject_GenericGetAttr',
  '4: impl_ply$lex$$$function__37_get_rules$$$function__1_lambda',
  '5: Nuitka_CallFunctionVectorcall'],
 ['1: getset_get',
  '2: _PyObject_GenericGetAttrWithDict',
  '3: PyObject_GenericGetAttr',
  '4: PyObject_GetAttr',
  '5: ../../Objects/funcobject.c'],
 ['1: getset_get',
  '2: _PyObject_GenericGetAttrWithDict',
  '3: _PyObject_LookupAttr',
  '4: ../../Objects/descrobject.c',
  '5: CALL_FUNCTION_WITH_SINGLE_ARG'],
 ['1: getset_get',
  '2: _PyObject_GenericGetAttrWithDict',
  '3: PyObject_GenericGetAttr',
  '4: impl___main__$$$function__1_lazyproperty',
  '5: CALL_FUNCTION_WITH_SINGLE_ARG'],
 ['1: getset_get',
  '2: _PyObject_GenericGetAttrWithDict',
  '3: PyObject_GenericGetAttr',
  '4: PyObject_GetAttr',
  '5: builtin_getattr'],
 ['1: getset_get',
  '2: _PyObject_GenericGetAttrWithDict',
  '3: PyObject_GenericGetAttr',
  '4: impl___main__$$$function__1_




It seems all(*16*) exceptions are in 4 kinds:

- `getset_get()` <- `_PyObject_GenericGetAttrWithDict()` <- `PyObject_GenericGetAttr()` <- `PyObject_GetAttr()`.
- `getset_set()`
- `PyUnicode_FromFormatV()`
- `PyNumber_InPlaceAdd()`

##### callstack A.

Refer to [this one](./cookbook/src/9/defining_context_managers_the_easy_way/example2.dtl.ctk)


In [71]:
! head -n 18 ./cookbook/src/9/defining_context_managers_the_easy_way/example2.dtl.ctk

1: getset_get
	at /Users/syrusakbary/Development/cpython-3.11/builddir/wasi/../../Objects/descrobject.c:202:16
2: _PyObject_GenericGetAttrWithDict
	at /Users/syrusakbary/Development/cpython-3.11/builddir/wasi/../../Objects/object.c:1278:19
3: PyObject_GenericGetAttr
	at /Users/syrusakbary/Development/cpython-3.11/builddir/wasi/../../Objects/object.c:1368:5
4: PyObject_GetAttr
	at /Users/syrusakbary/Development/cpython-3.11/builddir/wasi/../../Objects/object.c:916:18
5: builtin_getattr
	at /Users/syrusakbary/Development/cpython-3.11/builddir/wasi/../../Python/bltinmodule.c:1134:18
6: _PyEval_EvalFrameDefault
	at /Users/syrusakbary/Development/cpython-3.11/builddir/wasi/../../Python/ceval.c:5050:29
7: ../../Python/ceval.c
	at unknown:73:16
8: _PyFunction_Vectorcall
	at /Users/syrusakbary/Development/cpython-3.11/builddir/wasi/../../Objects/call.c:398:1
9: _PyVectorcall_Call
	at /Users/syrusakbary/Development/cpython-3.11/builddir/wasi/../../Objects/call.c:257:24


Observation:

- `_PyObject_GenericGetAttrWithDict()`. https://github.com/python/cpython/blob/3.11/Objects/object.c#L1243
- `res = f(descr, obj, (PyObject *)Py_TYPE(obj));`. https://github.com/python/cpython/blob/3.11/Objects/object.c#L1278
- `getset_get()`. https://github.com/python/cpython/blob/3.11/Objects/descrobject.c#L193
- `return descr_get_trampoline_call(descr->d_getset->get, obj, descr->d_getset->closure);`. https://github.com/python/cpython/blob/3.11/Objects/descrobject.c#L202
- ``` c
  // see pycore_object.h
  #if defined(__EMSCRIPTEN__) && defined(PY_CALL_TRAMPOLINE)
  #include <emscripten.h>
  EM_JS(int, descr_set_trampoline_call, (setter set, PyObject *obj, PyObject *value, void *closure), {
      return wasmTable.get(set)(obj, value, closure);
  });
  
  EM_JS(PyObject*, descr_get_trampoline_call, (getter get, PyObject *obj, void *closure), {
      return wasmTable.get(get)(obj, closure);
  });
  #else
  #define descr_set_trampoline_call(set, obj, value, closure) \
      (set)((obj), (value), (closure))
  
  #define descr_get_trampoline_call(get, obj, closure) \
      (get)((obj), (closure))    // <|-- HERE!
  
  #endif // __EMSCRIPTEN__ && PY_CALL_TRAMPOLINE
  ```
- `descr->d_getset->get(obj, descr->d_getset->closure)`
- ***indirect call type mismatch***. `func(i32,i32)->i32`(in bytecode) != `func(i32)->i32`(on stack). 