# Demo Analysis

## 1. Setup Workspace

In [1]:
import json
import pathlib
import copy
import operator
import pathlib
import os

import anytree
import networkx as nx
import sympy

import papan

## 2. Load Data

In [2]:
data_dir = pathlib.Path.cwd().parent / "experiments/demo/data"
raw_data_dir = data_dir / "raw"
trace_file = raw_data_dir / "paptrace.darwin-arm64-clang.json"
trees = papan.utils.from_file(trace_file)
print(f"Loaded {len(trees)} traces.")

Loaded 79 traces.


In [3]:
known_file = raw_data_dir / "known_exprs.json"
with open(known_file, "r") as f_in:
    known_exprs = json.loads(f_in.read())
print(f"Loaded {len(known_exprs)} known expressions.")
print(json.dumps(known_exprs, indent=2))

Loaded 1 known expressions.
{
  "int operator*=(int, int)": "C_OP_MUL_ASSIGN_INT_INT"
}


## 3. Node Summary

### 3.1. Unfiltered Nodes

In [4]:
nodes = []
for tree in trees:
    nodes.extend(anytree.PreOrderIter(tree.root))
print(f"Node count: {len(nodes)}")

Node count: 32682


In [5]:
node_types = {}
for node in nodes:
    node_types.setdefault(node.type, []).append(node)
print(f"Node type count: {len(node_types)}")

Node type count: 8


In [6]:
print("Nodes per type:")
for k, v in node_types.items():
    print(f"- {k}: {len(v)}")

Nodes per type:
- CalleeExpr: 79
- ReturnStmt: 79
- ForStmt: 963
- CallerExpr: 119
- LoopIter: 10811
- OpExpr: 20601
- IfThenStmt: 22
- WhileStmt: 8


### 3.2. Unique Nodes

In [7]:
# Notice: Disabled due to poor performance.

# unique_nodes = []
# for node in nodes:
#     if node not in unique_nodes:
#         unique_nodes.append(node)
# print(f"Unique node count: {len(unique_nodes)}")

In [8]:
# Notice: Disabled due to poor performance.

# # We expect this count to match the unfiltered node type count.
# unique_node_types = {}
# for node in unique_nodes:
#     unique_node_types.setdefault(node.type, []).append(node)
# print(f"Node type count: {len(unique_node_types)}")

In [9]:
# Notice: Disabled due to poor performance.

# print("Nodes per type:")
# for k, v in unique_node_types.items():
#     print(f"- {k}: {len(v)}")

## 4. Trace Summary

In [10]:
binned_trees = {}
for tree in trees:
    binned_trees.setdefault(tree.root.sig, []).append(tree)
print(f"Trace entry point count: {len(binned_trees)}")

Trace entry point count: 6


In [11]:
print("Traces per entry point:")
for k, v in binned_trees.items():
    print(f"- {k}: {len(v)}")

Traces per entry point:
- _Bool demo::IsEven(int): 8
- _Bool demo::IsPrime(int): 40
- int demo::ShiftsToZero(int): 8
- int demo::Factorial(int): 8
- int demo::NByNIncrements(int): 8
- int demo::NByNByNIncrements(int): 7


## 5. Trace Analysis

### 5.1. IsEven

In [12]:
trees_subset = binned_trees["_Bool demo::IsEven(int)"]

def to_params_str(params):
    #return ", ".join([f"{param['name']}={param['value']}" for param in params])
    return ", ".join([f"{param['value']}" for param in params])

def to_call_str(node):
    return f"{node.sig}: ({to_params_str(node.params)})"

def to_simple_node_view(node):
    sym = "sym @ " if hasattr(node, "target") else ""
    if node.is_call_node():
        node_type = type(node).__name__
        desc = f"{node.type}: {to_call_str(node)}"
    else:
        node_type = type(node).__name__
        desc = f"{node.type}: {node.desc}"
    return f"({sym}{node.name}) [{node_type}] {desc}"

for tree in operator.itemgetter(0, 1, 2)(trees_subset):
    for pre, _, node in anytree.RenderTree(tree.root):
        print(f"{pre}{to_simple_node_view(node)}")
    print()

(2080301) [CallNode] CalleeExpr: _Bool demo::IsEven(int): (0)
└── (2080299) [StmtNode] ReturnStmt: return value % 2 == 0

(2080301) [CallNode] CalleeExpr: _Bool demo::IsEven(int): (2)
└── (2080299) [StmtNode] ReturnStmt: return value % 2 == 0

(2080301) [CallNode] CalleeExpr: _Bool demo::IsEven(int): (4)
└── (2080299) [StmtNode] ReturnStmt: return value % 2 == 0



In [13]:
results = papan.analyze.analyze(known_exprs, trees_subset)
print("\nResults:")
print(json.dumps(results, indent=2))

Path summary:
- _Bool demo::IsEven(int): 1 paths
  - [path_0] (2080299,): 8 traces

Finding general expr. for: _Bool demo::IsEven(int): (0)
  Path is constant.
  Found expr.: C_2080301

Results:
{
  "sigs": {
    "_Bool demo::IsEven(int)": "sig_0"
  },
  "ctxs": {
    "sig_0": {
      "0": "path_0",
      "2": "path_0",
      "4": "path_0",
      "1": "path_0",
      "3": "path_0",
      "5": "path_0",
      "-2147483648": "path_0",
      "2147483647": "path_0"
    }
  },
  "exprs": {
    "sig_0": {
      "path_0": "C_2080301"
    }
  }
}


### 5.2. Factorial

In [14]:
trees_subset = binned_trees["int demo::Factorial(int)"]
for tree in operator.itemgetter(0, 2, 3, 4)(trees_subset):
    for pre, _, node in anytree.RenderTree(tree.root):
        print(f"{pre}{to_simple_node_view(node)}")
    print()

(2081318) [CallNode] CalleeExpr: int demo::Factorial(int): (-1)
└── (2081204) [StmtNode] IfThenStmt: n < 0 || n > 31
    └── (2081202) [StmtNode] ReturnStmt: return -1

(2081318) [CallNode] CalleeExpr: int demo::Factorial(int): (0)
├── (2081302) [LoopNode] ForStmt: for (int i = 1; i <= n; ++i)
└── (2081316) [StmtNode] ReturnStmt: return result

(2081318) [CallNode] CalleeExpr: int demo::Factorial(int): (1)
├── (2081302) [LoopNode] ForStmt: for (int i = 1; i <= n; ++i)
│   ├── (2081299) [StmtNode] LoopIter: LoopIter
│   │   └── (2081293) [StmtNode] OpExpr: int operator*=(int, int)
│   └── (2081279) [StmtNode] OpExpr: int operator++
└── (2081316) [StmtNode] ReturnStmt: return result

(2081318) [CallNode] CalleeExpr: int demo::Factorial(int): (2)
├── (2081302) [LoopNode] ForStmt: for (int i = 1; i <= n; ++i)
│   ├── (2081299) [StmtNode] LoopIter: LoopIter
│   │   └── (2081293) [StmtNode] OpExpr: int operator*=(int, int)
│   ├── (2081279) [StmtNode] OpExpr: int operator++
│   ├── (2081299)

In [15]:
results = papan.analyze.analyze(known_exprs, trees_subset)
print("\nResults:")
print(json.dumps(results, indent=2))

Path summary:
- int demo::Factorial(int): 3 paths
  - [path_0] (2081204, 2081202): 2 traces
  - [path_1] (2081302, 2081316): 1 traces
  - [path_2] (2081302, 2081299, 2081316): 5 traces

Finding general expr. for: int demo::Factorial(int): (0)
  Path is constant.
  Found expr.: C_2081318

Finding general expr. for: int demo::Factorial(int): (1)
  Path is constant.
  Found expr.: C_2081318

Finding general expr. for: int demo::Factorial(int): (2)
  Solving for loop expr for node: 2081302
    Loop iteration has perfect linear correlation.
    Setting loop expr for 6434789456 to 1.0*X0
  Found expr.: C_2081318 + 1.0*X0*(C_OP_MUL_ASSIGN_INT_INT + T_2081279)

Results:
{
  "sigs": {
    "int demo::Factorial(int)": "sig_0"
  },
  "ctxs": {
    "sig_0": {
      "-1": "path_0",
      "32": "path_0",
      "0": "path_1",
      "1": "path_2",
      "2": "path_2",
      "3": "path_2",
      "4": "path_2",
      "31": "path_2"
    }
  },
  "exprs": {
    "sig_0": {
      "path_0": "C_2081318",
     

### 5.3. IsPrime

In [16]:
trees_subset = binned_trees["_Bool demo::IsPrime(int)"]
results = papan.analyze.analyze(known_exprs, trees_subset)
print("\nResults:")
print(json.dumps(results, indent=2))

Path summary:
- _Bool demo::IsPrime(int): 5 paths
  - [path_0] (2081101, 2081110): 2 traces
  - [path_1] (2081101, 2081098, 2081110): 18 traces
  - [path_2] (2080494, 2080492): 2 traces
  - [path_3] (2081101, 2081098, 2081091, 2081089): 16 traces
  - [path_4] (2081101, 2081098, 2081098, 2081091, 2081089): 2 traces

Finding general expr. for: _Bool demo::IsPrime(int): (0)
  Path is constant.
  Found expr.: C_2081112

Finding general expr. for: _Bool demo::IsPrime(int): (1)
  Solving for loop expr for node: 2081101
    Performing symbolic regression.
   	      	                           fitness                            	                      size                     
   	      	--------------------------------------------------------------	-----------------------------------------------
gen	nevals	avg    	gen	max        	min     	nevals	std   	avg	gen	max	min	nevals	std    
0  	300   	17715.3	0  	5.25475e+06	0.617277	300   	302867	3.3	0  	7  	2  	300   	1.33292
1  	183   	4.90107	1  	

In [17]:
for tree in trees_subset:
    if tree.root.params[0]["value"] not in ["4", "6", "10", "16"]:
        continue
    #for pre, _, node in anytree.RenderTree(tree.root):
    #    print(f"{pre}{to_simple_node_view(node)}")
    print(anytree.RenderTree(tree.root))
    print(tree.get_cf_nodes())
    print()

CallNode(name=2081112, params=[{'name': 'n', 'value': '4'}], sig='_Bool demo::IsPrime(int)', type='CalleeExpr')
└── LoopNode(desc='for (int i = 2; i <= std::sqrt(n); ++i)', iter_block=[CallNode(name=2081036, params=[{'name': 'n', 'value': '4'}], sig='typename std::enable_if<std::is_integral<int>::value, double>::type sqrt(int)', type='CallerExpr'), StmtNode(desc='LoopIter', name=2081098, type='LoopIter')], iter_count=1, name=2081101, trailing_iter_block=None, type='ForStmt')
    ├── CallNode(name=2081036, params=[{'name': 'n', 'value': '4'}], sig='typename std::enable_if<std::is_integral<int>::value, double>::type sqrt(int)', type='CallerExpr')
    └── StmtNode(desc='LoopIter', name=2081098, type='LoopIter')
        └── StmtNode(desc='n % i == 0', name=2081091, type='IfThenStmt')
            └── StmtNode(desc='return false', name=2081089, type='ReturnStmt')
[2081101, 2081098, 2081091, 2081089]

CallNode(name=2081112, params=[{'name': 'n', 'value': '6'}], sig='_Bool demo::IsPrime(int)',

## 5.4. ShiftsToZero

In [18]:
trees_subset = binned_trees["int demo::ShiftsToZero(int)"]
results = papan.analyze.analyze(known_exprs, trees_subset)
print("\nResults:")
print(json.dumps(results, indent=2))

Path summary:
- int demo::ShiftsToZero(int): 2 paths
  - [path_0] (2080414, 2080425): 1 traces
  - [path_1] (2080414, 2080410, 2080425): 7 traces

Finding general expr. for: int demo::ShiftsToZero(int): (0)
  Path is constant.
  Found expr.: C_2080427

Finding general expr. for: int demo::ShiftsToZero(int): (1)
  Solving for loop expr for node: 2080414
    Performing symbolic regression.
   	      	                        fitness                        	                      size                     
   	      	-------------------------------------------------------	-----------------------------------------------
gen	nevals	avg    	gen	max   	min     	nevals	std    	avg    	gen	max	min	nevals	std    
0  	300   	4726.53	0  	666272	0.400807	300   	54219.8	3.26667	0  	7  	2  	300   	1.36951
1  	152   	12.2559	1  	3045  	0.402364	152   	175.481	2.19333	1  	5  	1  	152   	0.62925
2  	161   	3.34874	2  	348.857	0.402364	161   	20.7364	2.12   	2  	4  	1  	161   	0.540617
3  	183   	11.3513	3 

In [19]:
for tree in trees_subset:
    for pre, _, node in anytree.RenderTree(tree.root):
        print(f"{pre}{to_simple_node_view(node)}")
    print()

(2080427) [CallNode] CalleeExpr: int demo::ShiftsToZero(int): (0)
├── (2080414) [LoopNode] WhileStmt: while (n > 0)
└── (2080425) [StmtNode] ReturnStmt: return cnt

(2080427) [CallNode] CalleeExpr: int demo::ShiftsToZero(int): (1)
├── (2080414) [LoopNode] WhileStmt: while (n > 0)
│   └── (2080410) [StmtNode] LoopIter: LoopIter
│       ├── (2080397) [StmtNode] OpExpr: int operator>>=(int, int)
│       └── (2080407) [StmtNode] OpExpr: int operator++
└── (2080425) [StmtNode] ReturnStmt: return cnt

(2080427) [CallNode] CalleeExpr: int demo::ShiftsToZero(int): (3)
├── (2080414) [LoopNode] WhileStmt: while (n > 0)
│   ├── (2080410) [StmtNode] LoopIter: LoopIter
│   │   ├── (2080397) [StmtNode] OpExpr: int operator>>=(int, int)
│   │   └── (2080407) [StmtNode] OpExpr: int operator++
│   └── (2080410) [StmtNode] LoopIter: LoopIter
│       ├── (2080397) [StmtNode] OpExpr: int operator>>=(int, int)
│       └── (2080407) [StmtNode] OpExpr: int operator++
└── (2080425) [StmtNode] ReturnStmt: retu