# Demo Analysis

## 1. Setup Workspace

In [1]:
import json
import pathlib
import copy
import operator
import pathlib
import os

import anytree
import networkx as nx
import sympy

import paptree

## 2. Load Data

In [2]:
data_dir = pathlib.Path.cwd().parent / "experiments/demo/data"
raw_data_dir = data_dir / "raw"
trace_file = raw_data_dir / "paptrace.darwin-arm64-clang.json"
trees = paptree.utils.from_file(trace_file)
print(f"Loaded {len(trees)} traces.")

Loaded 84 traces.


In [3]:
known_file = raw_data_dir / "known_exprs.json"
with open(known_file, "r") as f_in:
    known_exprs = json.loads(f_in.read())
print(f"Loaded {len(known_exprs)} known expressions.")
print(json.dumps(known_exprs, indent=2))

Loaded 1 known expressions.
{
  "int operator*=(int, int)": "C_OP_MUL_ASSIGN_INT_INT"
}


## 3. Node Summary

### 3.1. Unfiltered Nodes

In [4]:
nodes = []
for tree in trees:
    nodes.extend(anytree.PreOrderIter(tree.root))
print(f"Node count: {len(nodes)}")

Node count: 3513


In [5]:
node_types = {}
for node in nodes:
    node_types.setdefault(node.type, []).append(node)
print(f"Node type count: {len(node_types)}")

Node type count: 8


In [6]:
print("Nodes per type:")
for k, v in node_types.items():
    print(f"- {k}: {len(v)}")

Nodes per type:
- CalleeExpr: 84
- ReturnStmt: 84
- WhileStmt: 8
- LoopIter: 1083
- OpExpr: 1794
- IfThenStmt: 22
- ForStmt: 319
- CallerExpr: 119


### 3.2. Unique Nodes

In [7]:
unique_nodes = []
for node in nodes:
    if node not in unique_nodes:
        unique_nodes.append(node)
print(f"Unique node count: {len(unique_nodes)}")

Unique node count: 252


In [8]:
# We expect this count to match the unfiltered node type count.
unique_node_types = {}
for node in unique_nodes:
    unique_node_types.setdefault(node.type, []).append(node)
print(f"Node type count: {len(unique_node_types)}")

Node type count: 8


In [9]:
print("Nodes per type:")
for k, v in unique_node_types.items():
    print(f"- {k}: {len(v)}")

Nodes per type:
- CalleeExpr: 84
- ReturnStmt: 9
- WhileStmt: 8
- LoopIter: 8
- OpExpr: 12
- IfThenStmt: 3
- ForStmt: 90
- CallerExpr: 38


## 4. Trace Summary

In [10]:
binned_trees = {}
for tree in trees:
    binned_trees.setdefault(tree.root.sig, []).append(tree)
print(f"Trace entry point count: {len(binned_trees)}")

Trace entry point count: 6


In [11]:
print("Traces per entry point:")
for k, v in binned_trees.items():
    print(f"- {k}: {len(v)}")

Traces per entry point:
- _Bool demo::IsEven(int): 8
- int demo::ShiftsToZero(int): 8
- int demo::Factorial(int): 8
- _Bool demo::IsPrime(int): 40
- int demo::NByNIncrements(int): 10
- int demo::NByNByNIncrements(int): 10


## 5. Trace Analysis

### 5.1. IsEven

In [12]:
trees_subset = binned_trees["_Bool demo::IsEven(int)"]

def to_params_str(params):
    #return ", ".join([f"{param['name']}={param['value']}" for param in params])
    return ", ".join([f"{param['value']}" for param in params])

def to_call_str(node):
    return f"{node.sig}: ({to_params_str(node.params)})"

def to_simple_node_view(node):
    sym = "sym @ " if hasattr(node, "target") else ""
    if node.is_call_node():
        node_type = type(node).__name__
        desc = f"{node.type}: {to_call_str(node)}"
    else:
        node_type = type(node).__name__
        desc = f"{node.type}: {node.desc}"
    return f"({sym}{node.name}) [{node_type}] {desc}"

for tree in operator.itemgetter(0, 1, 2)(trees_subset):
    for pre, _, node in anytree.RenderTree(tree.root):
        print(f"{pre}{to_simple_node_view(node)}")
    print()

(2080301) [CallNode] CalleeExpr: _Bool demo::IsEven(int): (0)
└── (2080299) [StmtNode] ReturnStmt: return value % 2 == 0

(2080301) [CallNode] CalleeExpr: _Bool demo::IsEven(int): (2)
└── (2080299) [StmtNode] ReturnStmt: return value % 2 == 0

(2080301) [CallNode] CalleeExpr: _Bool demo::IsEven(int): (4)
└── (2080299) [StmtNode] ReturnStmt: return value % 2 == 0



In [13]:
results = paptree.analyze.analyze(known_exprs, trees_subset)
print("\nResults:")
print(json.dumps(results, indent=2))

Path summary:
- _Bool demo::IsEven(int): 1 paths
  - [path_0] (2080299,): 8 traces

Finding general expr. for: _Bool demo::IsEven(int): (0)
  Path is constant.
  Found expr.: C_2080301

Results:
{
  "sigs": {
    "_Bool demo::IsEven(int)": "sig_0"
  },
  "ctxs": {
    "sig_0": {
      "0": "path_0",
      "2": "path_0",
      "4": "path_0",
      "1": "path_0",
      "3": "path_0",
      "5": "path_0",
      "-2147483648": "path_0",
      "2147483647": "path_0"
    }
  },
  "exprs": {
    "sig_0": {
      "path_0": "C_2080301"
    }
  }
}


### 5.2. Factorial

In [14]:
trees_subset = binned_trees["int demo::Factorial(int)"]
for tree in operator.itemgetter(0, 2, 3, 4)(trees_subset):
    for pre, _, node in anytree.RenderTree(tree.root):
        print(f"{pre}{to_simple_node_view(node)}")
    print()

(2080633) [CallNode] CalleeExpr: int demo::Factorial(int): (-1)
└── (2080519) [StmtNode] IfThenStmt: n < 0 || n > 31
    └── (2080517) [StmtNode] ReturnStmt: return -1

(2080633) [CallNode] CalleeExpr: int demo::Factorial(int): (0)
├── (2080617) [LoopNode] ForStmt: for (int i = 1; i <= n; ++i)
└── (2080631) [StmtNode] ReturnStmt: return result

(2080633) [CallNode] CalleeExpr: int demo::Factorial(int): (1)
├── (2080617) [LoopNode] ForStmt: for (int i = 1; i <= n; ++i)
│   ├── (2080614) [StmtNode] LoopIter: LoopIter
│   │   └── (2080608) [StmtNode] OpExpr: int operator*=(int, int)
│   └── (2080594) [StmtNode] OpExpr: int operator++
└── (2080631) [StmtNode] ReturnStmt: return result

(2080633) [CallNode] CalleeExpr: int demo::Factorial(int): (2)
├── (2080617) [LoopNode] ForStmt: for (int i = 1; i <= n; ++i)
│   ├── (2080614) [StmtNode] LoopIter: LoopIter
│   │   └── (2080608) [StmtNode] OpExpr: int operator*=(int, int)
│   ├── (2080594) [StmtNode] OpExpr: int operator++
│   ├── (2080614)

In [15]:
results = paptree.analyze.analyze(known_exprs, trees_subset)
print("\nResults:")
print(json.dumps(results, indent=2))

Path summary:
- int demo::Factorial(int): 3 paths
  - [path_0] (2080519, 2080517): 2 traces
  - [path_1] (2080617, 2080631): 1 traces
  - [path_2] (2080617, 2080614, 2080631): 5 traces

Finding general expr. for: int demo::Factorial(int): (0)
  Path is constant.
  Found expr.: C_2080633

Finding general expr. for: int demo::Factorial(int): (1)
  Path is constant.
  Found expr.: C_2080633

Finding general expr. for: int demo::Factorial(int): (2)
  Solving for loop expr for node: 2080617
X0 + 3.56423889223083
X0 + 3.6
    Setting loop expr for 5221007184 to X0 + 3.6
  Found expr.: C_2080633 + (C_OP_MUL_ASSIGN_INT_INT + T_2080594)*(X0 + 3.6)

Results:
{
  "sigs": {
    "int demo::Factorial(int)": "sig_0"
  },
  "ctxs": {
    "sig_0": {
      "-1": "path_0",
      "32": "path_0",
      "0": "path_1",
      "1": "path_2",
      "2": "path_2",
      "3": "path_2",
      "4": "path_2",
      "31": "path_2"
    }
  },
  "exprs": {
    "sig_0": {
      "path_0": "C_2080633",
      "path_1": "C_

### 5.3. IsPrime

In [21]:
trees_subset = binned_trees["_Bool demo::IsPrime(int)"]
results = paptree.analyze.analyze(known_exprs, trees_subset)
print("\nResults:")
print(json.dumps(results, indent=2))

Path summary:
- _Bool demo::IsPrime(int): 5 paths
  - [path_0] (2081307, 2081316): 2 traces
  - [path_1] (2081307, 2081304, 2081316): 18 traces
  - [path_2] (2080701, 2080699): 2 traces
  - [path_3] (2081307, 2081304, 2081297, 2081295): 16 traces
  - [path_4] (2081307, 2081304, 2081304, 2081297, 2081295): 2 traces

Finding general expr. for: _Bool demo::IsPrime(int): (0)
  Path is constant.
  Found expr.: C_2081318

Finding general expr. for: _Bool demo::IsPrime(int): (1)
  Solving for loop expr for node: 2081307
sqrt(X0 + 0.831075692060685) + sqrt(X0 + 0.85811278734734)
sqrt(X0 + 0.8) + sqrt(X0 + 0.9)
    Setting loop expr for 5221053840 to sqrt(X0 + 0.8) + sqrt(X0 + 0.9)
  Found expr.: C_2081318 + T_2081242 + (T_2081242 + T_2081264)*(sqrt(X0 + 0.8) + sqrt(X0 + 0.9))

Finding general expr. for: _Bool demo::IsPrime(int): (2)
  Path is constant.
  Found expr.: C_2081318

Finding general expr. for: _Bool demo::IsPrime(int): (3)
  Solving for loop expr for node: 2081307
6.19118622306427
6

In [29]:
for tree in trees_subset:
    if tree.root.params[0]["value"] not in ["25", "30", "36", "55"]:
        continue
    #for pre, _, node in anytree.RenderTree(tree.root):
    #    print(f"{pre}{to_simple_node_view(node)}")
    print(anytree.RenderTree(tree.root))
    print(tree.get_cf_nodes())
    print()

CallNode(name=2081318, params=[{'name': 'n', 'value': '25'}], sig='_Bool demo::IsPrime(int)', type='CalleeExpr')
└── LoopNode(desc='for (int i = 2; i <= std::sqrt(n); ++i)', iter_block=[CallNode(name=2081242, params=[{'name': 'n', 'value': '25'}], sig='typename std::enable_if<std::is_integral<int>::value, double>::type sqrt(int)', type='CallerExpr'), StmtNode(desc='LoopIter', name=2081304, type='LoopIter'), StmtNode(desc='int operator++', name=2081264, type='OpExpr')], iter_count=3, name=2081307, trailing_iter_block=[CallNode(name=2081242, params=[{'name': 'n', 'value': '25'}], sig='typename std::enable_if<std::is_integral<int>::value, double>::type sqrt(int)', type='CallerExpr'), StmtNode(desc='LoopIter', name=2081304, type='LoopIter')], type='ForStmt')
    ├── CallNode(name=2081242, params=[{'name': 'n', 'value': '25'}], sig='typename std::enable_if<std::is_integral<int>::value, double>::type sqrt(int)', type='CallerExpr')
    ├── StmtNode(desc='LoopIter', name=2081304, type='LoopIte

## 5.4. ShiftsToZero

In [18]:
trees_subset = binned_trees["int demo::ShiftsToZero(int)"]
results = paptree.analyze.analyze(known_exprs, trees_subset)
print("\nResults:")
print(json.dumps(results, indent=2))

Path summary:
- int demo::ShiftsToZero(int): 2 paths
  - [path_0] (2080414, 2080425): 1 traces
  - [path_1] (2080414, 2080410, 2080425): 7 traces

Finding general expr. for: int demo::ShiftsToZero(int): (0)
  Path is constant.
  Found expr.: C_2080427

Finding general expr. for: int demo::ShiftsToZero(int): (1)
  Solving for loop expr for node: 2080414
log(X0 + 0.969704266233825) + log(X0 + 1.0377625950867)
2*log(X0 + 1.0)
    Setting loop expr for 5220951568 to 2*log(X0 + 1.0)
  Found expr.: C_2080427 + 2*(T_2080397 + T_2080407)*log(X0 + 1.0)

Results:
{
  "sigs": {
    "int demo::ShiftsToZero(int)": "sig_0"
  },
  "ctxs": {
    "sig_0": {
      "0": "path_0",
      "1": "path_1",
      "3": "path_1",
      "7": "path_1",
      "15": "path_1",
      "31": "path_1",
      "63": "path_1",
      "127": "path_1"
    }
  },
  "exprs": {
    "sig_0": {
      "path_0": "C_2080427",
      "path_1": "C_2080427 + 2*(T_2080397 + T_2080407)*log(X0 + 1.0)"
    }
  }
}


In [19]:
for tree in trees_subset:
    for pre, _, node in anytree.RenderTree(tree.root):
        print(f"{pre}{to_simple_node_view(node)}")
    print()

(2080427) [CallNode] CalleeExpr: int demo::ShiftsToZero(int): (0)
├── (2080414) [LoopNode] WhileStmt: while (n > 0)
└── (2080425) [StmtNode] ReturnStmt: return cnt

(2080427) [CallNode] CalleeExpr: int demo::ShiftsToZero(int): (1)
├── (2080414) [LoopNode] WhileStmt: while (n > 0)
│   └── (2080410) [StmtNode] LoopIter: LoopIter
│       ├── (2080397) [StmtNode] OpExpr: int operator>>=(int, int)
│       └── (2080407) [StmtNode] OpExpr: int operator++
└── (2080425) [StmtNode] ReturnStmt: return cnt

(2080427) [CallNode] CalleeExpr: int demo::ShiftsToZero(int): (3)
├── (2080414) [LoopNode] WhileStmt: while (n > 0)
│   ├── (2080410) [StmtNode] LoopIter: LoopIter
│   │   ├── (2080397) [StmtNode] OpExpr: int operator>>=(int, int)
│   │   └── (2080407) [StmtNode] OpExpr: int operator++
│   └── (2080410) [StmtNode] LoopIter: LoopIter
│       ├── (2080397) [StmtNode] OpExpr: int operator>>=(int, int)
│       └── (2080407) [StmtNode] OpExpr: int operator++
└── (2080425) [StmtNode] ReturnStmt: retu