Make a dependent type checker using z3 lambdas



In [2]:
%%file /tmp/foo.c
int foo(int x, int y){
    return x > y ? x : y;
}

Writing /tmp/foo.c


In [1]:
%%bash
gcc /tmp/foo.c -c -o /tmp/foo.o
objdump -d -F /tmp/foo.o


/tmp/foo.o:     file format elf64-x86-64


Disassembly of section .text:

0000000000000000 <foo> (File Offset: 0x40):
   0:	f3 0f 1e fa          	endbr64 
   4:	55                   	push   %rbp
   5:	48 89 e5             	mov    %rsp,%rbp
   8:	89 7d fc             	mov    %edi,-0x4(%rbp)
   b:	89 75 f8             	mov    %esi,-0x8(%rbp)
   e:	8b 55 f8             	mov    -0x8(%rbp),%edx
  11:	8b 45 fc             	mov    -0x4(%rbp),%eax
  14:	39 c2                	cmp    %eax,%edx
  16:	0f 4d c2             	cmovge %edx,%eax
  19:	5d                   	pop    %rbp
  1a:	c3                   	ret    


In [5]:
import pypcode
# https://github.com/philzook58/pcode2c/blob/main/pcode2c/printer.py
from pypcode import Context, PcodePrettyPrinter
ctx = Context("x86:LE:64:default")
file_offset = 0x40
filename = "/tmp/foo.o"
size = 0x1a
start_address = 0
with open(filename, "rb") as file:
    file.seek(file_offset)
    code = file.read(size)
res = ctx.translate(code, base_address=start_address)
for op in res.ops:
    print(PcodePrettyPrinter.fmt_op(op))





IMARK ram[0:4]
IMARK ram[4:1]
unique[ed00:8] = RBP
RSP = RSP - 0x8
*[ram]RSP = unique[ed00:8]
IMARK ram[5:3]
RBP = RSP
IMARK ram[8:3]
unique[3100:8] = RBP + 0xfffffffffffffffc
unique[bf00:4] = EDI
*[ram]unique[3100:8] = unique[bf00:4]
IMARK ram[b:3]
unique[3100:8] = RBP + 0xfffffffffffffff8
unique[bf00:4] = ESI
*[ram]unique[3100:8] = unique[bf00:4]
IMARK ram[e:3]
unique[3100:8] = RBP + 0xfffffffffffffff8
unique[bf00:4] = *[ram]unique[3100:8]
EDX = unique[bf00:4]
RDX = zext(EDX)
IMARK ram[11:3]
unique[3100:8] = RBP + 0xfffffffffffffffc
unique[bf00:4] = *[ram]unique[3100:8]
EAX = unique[bf00:4]
RAX = zext(EAX)
IMARK ram[14:2]
CF = EDX < EAX
OF = sborrow(EDX, EAX)
unique[29700:4] = EDX - EAX
SF = unique[29700:4] s< 0x0
ZF = unique[29700:4] == 0x0
unique[13180:4] = unique[29700:4] & 0xff
unique[13200:1] = popcount(unique[13180:4])
unique[13280:1] = unique[13200:1] & 0x1
PF = unique[13280:1] == 0x0
IMARK ram[16:3]
unique[cf80:1] = OF == SF
RAX = zext(EAX)
unique[28880:1] = !unique[cf80:1]
i

So representation questions:
How to implement ram. 
BitVecSort(64) -> BitVecSort(8)
dict(String, ArraySort(BitVecSort(64) BitVec(8)))

MultiStore.

Hmm. This is quite similar in design to pcode2c.

In [33]:
BV8  = BitVecSort(8)
BV16 = BitVecSort(16)
BV32 = BitVecSort(32)
BV64 = BitVecSort(64)

Select16 = Function("Select16", ArraySort(BitVecSort(64), BitVecSort(8)), BitVecSort(64), BitVecSort(16))
Select32 = Function("Select32", ArraySort(BitVecSort(64), BitVecSort(8)), BitVecSort(64), BitVecSort(32))
Select64 = Function("Select64", ArraySort(BitVecSort(64), BitVecSort(8)), BitVecSort(64), BitVecSort(64))
def MSelect(a, i, n):
    if n == 1:
        return a[i]
    if n == 2:
        return Select16(a, i)
    if n == 4:
        return Select32(a, i)
    if n == 8:
        return Select64(a, i)
    else:
        assert False, "n must be 1, 2, 4 or 8"
Store16 = Function("Store16", ArraySort(BitVecSort(64), BitVecSort(8)), BitVecSort(64), BitVecSort(16), ArraySort(BitVecSort(64), BitVecSort(8)))
Store32 = Function("Store32", ArraySort(BitVecSort(64), BitVecSort(8)), BitVecSort(64), BitVecSort(32), ArraySort(BitVecSort(64), BitVecSort(8)))
Store64 = Function("Store64", ArraySort(BitVecSort(64), BitVecSort(8)), BitVecSort(64), BitVecSort(64), ArraySort(BitVecSort(64), BitVecSort(8)))

def MStore(a,i,v):
    size = a.size()
    if size == 1:
        return Store(a, i, v)
    if size == 2:
        return Store16(a, i, v)
    if size == 4:
        return Store32(a, i, v)
    if size == 8:
        return Store64(a, i, v)
    else:
        assert False, "size must be 1, 2, 4 or 8"


In [30]:
from pypcode import OpCode
from z3 import *
Space = StringSort()
Varnode = Datatype('Varnode')
Varnode.declare('v', ('space', Space), ('offset', BitVecSort(64)))
Varnode = Varnode.create()

State = ArraySort(Varnode, BitVecSort(8))
#Store("constant", Lambda([x], x))
x = BitVec("x", 64)
State0 = {"ram" : None, "unique" : None, "const" : Lambda([x], x)}

def varnode(vnode):
    if vnode == None:
        return None
    elif vnode.space.name == "const":
        return BitVecVal(vnode.offset, vnode.size * 8)
    return BitVecVal(vnode.offset, vnode.size * 8)

def step(op, state1, state2):
    output = varnode(op.output)
    inputs = [varnode(i) for i in op.inputs]
    match op.opcode:
        case OpCode.IMARK:
            return f"IMARK "
        case OpCode.COPY:
            return state2 == MStore(state1, output, state1[inputs[0]])
        case OpCode.INT_SUB:
            return state2 == MStore(state1, output, state1[inputs[0]] - state1[inputs[1]])
        case _:
            print("unrecognized opcode", op.opcode)
            assert False
state1, state2 = Consts("state1 state2", State)
for op in res.ops:
    print(step(op, state1, state2))

IMARK 
IMARK 
state2 ==
Store(state1, v("unique", 60672), state1[v("register", 40)])


Z3Exception: Sort mismatch

Decompiler. Steps 
Static analysis over pcode:
constant propagation
liveness analysis

We'd want to collect out registers etc into more normal local variables.
Memory locations also. Indirect makes it harder. Well, we can alias. Hmm.

discharge equivalences of section to cbmc

We could eliminate jump points and push the obligation along to cbmc.

What about the macro decompiler idea

Bisimulation the whole way.

sail. Fugeddaboutit. Maybe in version 10


In [None]:
# state is a partial mapping of states to 
# hmm, but we need to have control flow to know nothing else can inject in to that position
def constprop(state):
    pass

# liveness is easier since we see jumps away.
def liveness(state):
    pass



In [None]:
# combining static analysis wti symex

prog = """
const() :- const(Addr, Reg, Val), op(Addr, INT_LESS(vnode(space, offset, size))). 
"""

In [None]:
"""
let rec typeof t =
  match t with
    TmTrue(fi) -> 
      TyBool
  | TmFalse(fi) -> 
      TyBool
  | TmIf(fi,t1,t2,t3) ->
     if (=) (typeof t1) TyBool then
       let tyT2 = typeof t2 in
       if (=) tyT2 (typeof t3) then tyT2
       else error fi "arms of conditional have different types"
     else error fi "guard of conditional not a boolean"
  | TmZero(fi) ->
      TyNat
  | TmSucc(fi,t1) ->
      if (=) (typeof t1) TyNat then TyNat
      else error fi "argument of succ is not a number"
  | TmPred(fi,t1) ->
      if (=) (typeof t1) TyNat then TyNat
      else error fi "argument of pred is not a number"
  | TmIsZero(fi,t1) ->
      if (=) (typeof t1) TyNat then TyBool
      else error fi "argument of iszero is not a number"
      """
def typeof(t):
    match t:
        case ("True",) | ("False",):
            return ("Bool",)
        case ("If", t1, t2, t3):
            if typeof(t1) == ("Bool",):
                tyT2 = typeof(t2)
                if tyT2 == typeof(t3):
                    return tyT2
                else:
                    raise Exception("arms of conditional have different types")
            else:
                raise Exception("guard of conditional not a boolean")
        case ("Zero",):
            return ("Nat",)
        case ("Succ", t1):
            if typeof(t1) == ("Nat",):
                return ("Nat",)
            else:
                raise Exception("argument of succ is not a number")
        case ("Pred", t1):
            if typeof(t1) == ("Nat",):
                return ("Nat",)
            else:
                raise Exception("argument of pred is not a number")
        case ("IsZero", t1):
            if typeof(t1) == ("Nat",):
                return ("Bool",)
            else:
                raise Exception("argument of iszero is not a number")



        

In [None]:
"""
let rec isnumericval t = match t with
    TmZero(_) -> true
  | TmSucc(_,t1) -> isnumericval t1
  | _ -> false

let rec isval t = match t with
    TmTrue(_)  -> true
  | TmFalse(_) -> true
  | t when isnumericval t  -> true
  | _ -> false

let rec eval1 t = match t with
    TmIf(_,TmTrue(_),t2,t3) ->
      t2
  | TmIf(_,TmFalse(_),t2,t3) ->
      t3
  | TmIf(fi,t1,t2,t3) ->
      let t1' = eval1 t1 in
      TmIf(fi, t1', t2, t3)
  | TmSucc(fi,t1) ->
      let t1' = eval1 t1 in
      TmSucc(fi, t1')
  | TmPred(_,TmZero(_)) ->
      TmZero(dummyinfo)
  | TmPred(_,TmSucc(_,nv1)) when (isnumericval nv1) ->
      nv1
  | TmPred(fi,t1) ->
      let t1' = eval1 t1 in
      TmPred(fi, t1')
  | TmIsZero(_,TmZero(_)) ->
      TmTrue(dummyinfo)
  | TmIsZero(_,TmSucc(_,nv1)) when (isnumericval nv1) ->
      TmFalse(dummyinfo)
  | TmIsZero(fi,t1) ->
      let t1' = eval1 t1 in
      TmIsZero(fi, t1')
  | _ -> 
      raise NoRuleApplies

let rec eval t =
  try let t' = eval1 t
      in eval t'
  with NoRuleApplies -> t
"""
def isnumericval(t):
    match t:
        case ("Zero",):
            return True
        case ("Succ", t1):
            return isnumericval(t1)
        case _:
            return False
def isval(t):
    match t:
        case ("True",) | ("False",):
            return True
        case t if isnumericval(t):
            return True
        case _:
            return False
def eval1(t):
    match t:
        case ("If", ("True",), t2, t3):
            return t2
        case ("If", ("False",), t2, t3):
            return t3
        case ("If", t1, t2, t3):
            t1_ = eval1(t1)
            return ("If", t1_, t2, t3)
        case ("Succ", t1):
            t1_ = eval1(t1)
            return ("Succ", t1_)
        case ("Pred", ("Zero",)):
            return ("Zero",)
        case ("Pred", ("Succ", nv1)) if isnumericval(nv1):
            return nv1
        case ("Pred", t1):
            t1_ = eval1(t1)
            return ("Pred", t1_)
        case ("IsZero", ("Zero",)):
            return ("True",)
        case ("IsZero", ("Succ", nv1)) if isnumericval(nv1):
            return ("False",)
        case ("IsZero", t1):
            t1_ = eval1(t1)
            return ("IsZero", t1_)
        case _:
            raise Exception("NoRuleApplies")

# pretty goofy idiom here.
def eval(t):
    try:
        t_ = eval1(t)
        return eval(t_)
    except Exception:
        return t

In [None]:
import lark


ocaml_mly_grammar = """
toplevel :
    EOF
      { [] }
  | Command SEMI toplevel
      { let cmd = $1 in
          let cmds = $3 in
          cmd::cmds }

/* A top-level command */
Command :
    IMPORT STRINGV { (Import($2.v)) }
  | Term 
      { (let t = $1 in Eval(tmInfo t,t)) }

Term :
    AppTerm
      { $1 }
  | IF Term THEN Term ELSE Term
      { TmIf($1, $2, $4, $6) }

AppTerm :
    ATerm
      { $1 }
  | SUCC ATerm
      { TmSucc($1, $2) }
  | PRED ATerm
      { TmPred($1, $2) }
  | ISZERO ATerm
      { TmIsZero($1, $2) }

/* Atomic terms are ones that never require extra parentheses */
ATerm :
    LPAREN Term RPAREN  
      { $2 } 
  | TRUE
      { TmTrue($1) }
  | FALSE
      { TmFalse($1) }
  | INTV
      { let rec f n = match n with
              0 -> TmZero($1.i)
            | n -> TmSucc($1.i, f (n-1))
          in f $1.v }

/* All type expressions */
Type :
    AType
                { $1 }

/* Atomic types are those that never need extra parentheses */
AType :
    LPAREN Type RPAREN  
           { $2 } 
  | BOOL
      { TyBool }
  | NAT
      { TyNat }
"""

grammar = """
toplevel : (command ";")* command ";"?
command : term
term : app_term | "if" term "then" term "else" term
app_term : aterm | "succ" aterm | "pred" aterm | "iszero" aterm
aterm : "(" term ")" | "true" | "false" | INT
type : atype
atype : "(" type ")" | "bool" | "nat"
"""

lark.Grammar(grammar)

In [3]:
//%%script rust-script

enum Term {
    True,
    False,
    If(Box<Term>, Box<Term>, Box<Term>),
    Zero,
    Succ(Box<Term>),
    Pred(Box<Term>),
    IsZero(Box<Term>)
}

/* 
impl Term {
    fn eval(&self) -> Term {
        match self {
            Term::If(t1, t2, t3) => {
                match t1.eval() {
                
                }
 }*/


Error: this file contains an unclosed delimiter

In [2]:
("true",)

('true',)

https://www.youtube.com/watch?v=m3bt3BYB0vQ&ab_channel=MichaelRyanClarkson


In [None]:
"""
(* Data type definitions *)
type kind = 
    KnStar
  | KnArr of kind * kind

type ty =
    TyVar of int * int
  | TyArr of ty * ty
  | TyAbs of string * kind * ty
  | TyApp of ty * ty
  | TyAll of string * kind * ty

type term =
    TmVar of info * int * int
  | TmAbs of info * string * ty * term
  | TmApp of info * term * term
  | TmTAbs of info * string * kind * term
  | TmTApp of info * term * ty
"""

type
term_tags = ["Var", "Abs", "App", "TAbs", "TApp"]

def typecheck(env, t, ty):

"""

let rec typeof ctx t =
  match t with
    TmVar(fi,i,_) -> getTypeFromContext fi ctx i
  | TmAbs(fi,x,tyT1,t2) ->
      checkkindstar fi ctx tyT1;
      let ctx' = addbinding ctx x (VarBind(tyT1)) in
      let tyT2 = typeof ctx' t2 in
      TyArr(tyT1, typeShift (-1) tyT2)
  | TmApp(fi,t1,t2) ->
      let tyT1 = typeof ctx t1 in
      let tyT2 = typeof ctx t2 in
      (match simplifyty ctx tyT1 with
          TyArr(tyT11,tyT12) ->
            if tyeqv ctx tyT2 tyT11 then tyT12
            else error fi "parameter type mismatch"
        | _ -> error fi "arrow type expected")
  | TmTAbs(fi,tyX,knK1,t2) ->
      let ctx = addbinding ctx tyX (TyVarBind(knK1)) in
      let tyT2 = typeof ctx t2 in
      TyAll(tyX,knK1,tyT2)
  | TmTApp(fi,t1,tyT2) ->
      let knKT2 = kindof ctx tyT2 in
      let tyT1 = typeof ctx t1 in
      (match simplifyty ctx tyT1 with
           TyAll(_,knK11,tyT12) ->
             if knK11 <> knKT2 then
               error fi "Type argument has wrong kind";
             typeSubstTop tyT2 tyT12
         | _ -> error fi "universal type expected")
"""
def typeof(ctx, t):
    match t:
        case ("Var", i, _):
            return ctx[i]
        case ("Abs", x, tyT1, t2):
            ctx = {**ctx, x: tyT1}
        case ("App", t1, t2):
            tyT1 = typeof(ctx, t1)
            tyT2 = typeof(ctx, t2)
            match simplifyty(ctx, tyT1):
                case ("Arr", tyT11, tyT12):
                    if tyeqv(ctx, tyT2, tyT11):
                        return tyT12
                    else:
                        raise ValueError(fi, "parameter type mismatch")
                case _:
                    raise ValueError(fi, "arrow type expected")
        case ("TAbs", tyX, knK1, t2):
            ctx = {**ctx, tyX: knK1}
            tyT2 = typeof(ctx, t2)
            return ("All", tyX, knK1, tyT2)
        case ("TApp", t1, tyT2):
            knKT2 = kindof(ctx, tyT2)
            tyT1 = typeof(ctx, t1)
            match simplifyty(ctx, tyT1):
                case ("All", _, knK11, tyT12):
                    if knK11 != knKT2:
                        raise ValueError(fi, "Type argument has wrong kind")
                    return typeSubstTop(tyT2, tyT12)
                case _:
                    raise ValueError(fi, "universal type expected")
    

In [4]:
from collections import namedtuple
Ctx = namedtuple("Ctx",[])


typed asm
pcc