In [7]:
using SemanticModels
#using SemanticModels.ModelTools
using Random

We want to ingest the main method of a program, and recursively enumerate: (1) the set of subroutines, S = {s | s is called by main} that main calls; (2) the set of variables that main interacts with (as args, or in the form of locally scoped assignment statements), V = {v | v is passed to main, or is a local variable inside of main}; (3) the set of sub-subroutines (e.g., subroutines called by each sub-routine called by main), $S_{sr}$ = {s.sr | s.sr called by s; $s \in S$}; and (4) the set of variables that the set of methods, $S$, interacts with, which may be global (w/respect to main) or local (w/respect to S) in scope, $V_{sr}$ = {s.sr.v | v is passed to an s.sr $\in S_{sr}$, or is a local variable inside of an s.sr $\in S_{sr}$}

#### 1. Define Toy Methods
To begin, we can define some toy methods with useful properties (e.g., sub-routine calls; recursive calls, empty args). We wrap our method declarations and calls in a `quote` so that we can parse the program (and its component pieces) as `Expr` objects; this allows us to traverse the ASTs.

In [16]:
# Let's define some toy functions with useful properties (e.g., sub-routine calls; recursive calls, empty args):
example_expr = quote 
    
function fib(n::Int64)
    if n <= 2
        return 1
    else 
        return fib(n-1) + fib(n-2)
    end
end

function circumference(r)
    circum = 2*pi*r
    return circum
end

function exp_decay(init_value, decay_rate, t)
    y = init_value(1-decay_rate)^t
    return y
end
    
function compute_circle_metrics(r)
    c = circumference(r)
    a = pi*r^2
    return c,a
end
    
function add_some_noise(x)
    rng = MersenneTwister(1234);
    x *= Base.randn(rng,1)
    return x
end
        
@show compute_circle_metrics(5)
@show fib(3)  
@show add_some_noise(357)

end

quote
    #= In[16]:4 =#
    function fib(n::Int64)
        #= In[16]:5 =#
        if n <= 2
            #= In[16]:6 =#
            return 1
        else
            #= In[16]:8 =#
            return fib(n - 1) + fib(n - 2)
        end
    end
    #= In[16]:12 =#
    function circumference(r)
        #= In[16]:13 =#
        circum = 2 * pi * r
        #= In[16]:14 =#
        return circum
    end
    #= In[16]:17 =#
    function exp_decay(init_value, decay_rate, t)
        #= In[16]:18 =#
        y = init_value(1 - decay_rate) ^ t
        #= In[16]:19 =#
        return y
    end
    #= In[16]:22 =#
    function compute_circle_metrics(r)
        #= In[16]:23 =#
        c = circumference(r)
        #= In[16]:24 =#
        a = pi * r ^ 2
        #= In[16]:25 =#
        return (c, a)
    end
    #= In[16]:28 =#
    function add_some_noise(x)
        #= In[16]:29 =#
        rng = MersenneTwister(1234)
        #= In[16]:30 =#
        x *= Base.randn(rng, 1)
        #= In[16]:31 =#
        re


#### 2. Define Structs and Code Annotation Methods 
The data structures and methods defined below are intended to facilitate the annotation of our input "program" via injection (e.g., we insert data structures into the toy methods defined above for the purpose of extracting information that we can use to build the dataflow graph, including directed relationships of the types: `method -calls-> method`; `method -interacts_with-> variable`; `method -returns-> variable`; and `variable -takes-> value`).

In [17]:
mutable struct Method{name, args, expression, called_by}
    name::name
    args::args
    expression::expression
    called_by::called_by
end

mutable struct DeclaredMethodsCollector{declared_methods}
    declared_methods::declared_methods
end

mutable struct CalledMethodsCollector{called_methods}
    called_methods::called_methods
end

mutable struct Variable{name, cur_val, read_write_by}
    name::name
    cur_val::cur_val
    read_write_by::read_write_by
end

set_val!(v::Variable, new_val::Any) = (v.cur_val = new_val)

mutable struct VariableCollector{vars}
    vars::vars
end
    

In [18]:
"""
Take a Julia program (wrapped in quote), parses the AST, and recover all top level method signatures from declared and called methods.
"""
function get_all_top_level_signatures(expr_to_parse::Expr, expr_name::String, decl_methods_col::DeclaredMethodsCollector, called_methods_col::CalledMethodsCollector)
    
    for ex in expr_to_parse.args
        
        if isa(ex, LineNumberNode)
            continue
        end
        
        if ex.head == :function
            
            # the first arg of each function expression will be the function call 
            signature = ex.args[1]
            
            # the remainder of the expression args will be the args that are passed into the function
            args = signature.args[2:length(signature.args)]
            
            decl_method = Method(signature, args, ex, "$expr_name")
            push!(decl_methods_col.declared_methods, decl_method)
        end
        
        if (ex.head == :macrocall)
            
            # arg 1 is the :macrocall head; arg 2 is the line number 
            signature = ex.args[3]
            args = signature.args[2:length(signature.args)]
            called_method = Method(signature, args, Expr(:call, signature, args...), "$expr_name")
            push!(called_methods_col.called_methods, called_method)
        end

    end

    return decl_methods_col, called_methods_col
    
end

get_all_top_level_signatures

In [19]:
"""
Helper function that ingests an expression to parse, and outputs collector structs containing all declared and called methods
"""

function collect_method_info(expr_to_parse)
    
    delc_methods = DeclaredMethodsCollector(Method[])
    called_methods = CalledMethodsCollector(Method[])
    delc_methods, called_methods = get_all_top_level_signatures(expr_to_parse, "toy_expr",
        delc_methods, called_methods)
end



collect_method_info (generic function with 1 method)

In [20]:
nametype(ex::Expr) = begin
    ex.head == :(::) || error("$ex is not a type assertion")
    avar = ex.args[1]
    atyp = ex.args[2]
    return avar, atyp
end

nametype(s::Symbol) = (s, :Any)

nametype (generic function with 2 methods)

In [21]:
function collect_subroutine_calls(collector, subtree_root, caller_func)
    
    if subtree_root.head == :call
        
        subroutine_name = subtree_root.args[1]
        sr_args = subtree_root.args[2:end]

        subroutine = Method(subroutine_name, sr_args, subtree_root, caller_func)
        
        #println(subroutine.name, " ", subroutine.args, " ", subroutine.expression, " ", subroutine.called_by)
        push!(collector, subroutine)
    end

    
end


collect_subroutine_calls (generic function with 1 method)

In [22]:
function describe_args(fu::Expr)
    f = fu
    @show argl = f.args[1].args[2:end]
    @show argl
    @show body = f.args[2].args

    fname = string(f.args[1])
    
    for a in reverse(argl)
        avar, atyp = nametype(a)
        varname = string(avar)
        type_avar = typeof(avar)
        pushfirst!(body,
            :(println("F: ", $fname, "; ",
                $varname, " = ", $avar, " :: ", $type_avar, " <: ", $atyp))
        )

    end
    return f
end



describe_args (generic function with 1 method)

In [23]:
function annotate_program(input_expr)
    
    expr_to_annotate = copy(input_expr)
    declared, called = collect_method_info(expr_to_annotate)
    
    first_and_second_order_method_calls = CalledMethodsCollector([]).called_methods
    program_level_variables = VariableCollector([]).vars

    for method in declared.declared_methods
        
        f = method.expression
        
        describe_args(f)
        
        body = f.args[2].args

        fname = string(f.args[1])

        for ex in body
            
            if isa(ex,LineNumberNode)
                continue
            end
            
            if ex.head == :push!
                continue
            end
            
            if ex.head == :println
                continue
            end
            
            if ex.head == :(=) 
                a = ex.args[1]
                b = ex.args[2]
                varname = string(a)
                
                insert!(body, length(body)-1,:(println("A: ", $fname, "; " , $varname, " = ", $b)))
                insert!(body, length(body)-1,:(push!($program_level_variables, Variable($varname, $b, $fname))))

                # Collect second-level method calls
                if isa(b, Expr)
                    collect_subroutine_calls(first_and_second_order_method_calls, b, f.args[1])
                end

            end
            
           if ex.head in (:(*=), :(+=), :(-=), :(/=))
                
                a = ex.args[1]
                varname = string(a)
                b = ex.args[2]

                # The math op is the symbol right before the equals sign in the ex.head
                math_op = string(ex.head)[1] 
                insert!(body, length(body)-1,:(println("A: ", $fname, "; " , $varname, " = ", $varname, " ", $math_op, " ", $b, "; == ", $a)))
                insert!(body, length(body)-1,:(push!($program_level_variables, Variable($varname, $a, $fname))))
                
                # Collect second-level method calls
                if isa(b, Expr)
                    collect_subroutine_calls(first_and_second_order_method_calls, b, f.args[1])
                end

            end
            
            if ex.head == :return && isa(body[length(body)-1],LineNumberNode)
                return_vals = ex.args[1:end]
                
                # can catch all assgmts take lhs and print out values of lhs
                insert!(body, length(body), :(println("R: ", $fname, " returns ", $return_vals)))
                
                for v in program_level_variables                    
                    insert!(body, length(body), :(set_val!(v, v.name)))
                end
            end

        end
    end    
    return expr_to_annotate, first_and_second_order_method_calls, program_level_variables
    
end


annotate_program (generic function with 1 method)

#### 3. Use code annotation script to collect metadata to build dataflow graph

In [24]:
# First, let's collect the list of declared and called methods from our example expression, which contains our toy methods
declared, called = collect_method_info(example_expr)

println("Declared Methods")
for method in declared.declared_methods
    println(method.name)
end

println("\nCalled Methods")
for method in called.called_methods  
    println(method.name)
end

Declared Methods
fib(n::Int64)
circumference(r)
exp_decay(init_value, decay_rate, t)
compute_circle_metrics(r)
add_some_noise(x)

Called Methods
compute_circle_metrics(5)
fib(3)
add_some_noise(357)


In [25]:
# Now, let's run our annotation method on the example expression. 
# When we do this, we'll get back a revised (annotated) expression that can be evaluated to yield the information
# we can use to build the dataflow graph.

annotated_expr, first_and_second_order_calls, prog_vars = annotate_program(example_expr)

argl = (f.args[1]).args[2:end] = Any[:(n::Int64)]
argl = Any[:(n::Int64)]
body = (f.args[2]).args = Any[:(#= In[16]:5 =#), :(if n <= 2
      #= In[16]:6 =#
      return 1
  else
      #= In[16]:8 =#
      return fib(n - 1) + fib(n - 2)
  end)]
argl = (f.args[1]).args[2:end] = Any[:r]
argl = Any[:r]
body = (f.args[2]).args = Any[:(#= In[16]:13 =#), :(circum = 2 * pi * r), :(#= In[16]:14 =#), :(return circum)]
argl = (f.args[1]).args[2:end] = Any[:init_value, :decay_rate, :t]
argl = Any[:init_value, :decay_rate, :t]
body = (f.args[2]).args = Any[:(#= In[16]:18 =#), :(y = init_value(1 - decay_rate) ^ t), :(#= In[16]:19 =#), :(return y)]
argl = (f.args[1]).args[2:end] = Any[:r]
argl = Any[:r]
body = (f.args[2]).args = Any[:(#= In[16]:23 =#), :(c = circumference(r)), :(#= In[16]:24 =#), :(a = pi * r ^ 2), :(#= In[16]:25 =#), :(return (c, a))]
argl = (f.args[1]).args[2:end] = Any[:x]
argl = Any[:x]
body = (f.args[2]).args = Any[:(#= In[16]:29 =#), :(rng = MersenneTwister(1234)), :(#= In[16]:

(quote
    #= In[16]:4 =#
    function fib(n::Int64)
        println("F: ", "fib(n::Int64)", "; ", "n", " = ", n, " :: ", Symbol, " <: ", Int64)
        #= In[16]:5 =#
        if n <= 2
            #= In[16]:6 =#
            return 1
        else
            #= In[16]:8 =#
            return fib(n - 1) + fib(n - 2)
        end
    end
    #= In[16]:12 =#
    function circumference(r)
        println("F: ", "circumference(r)", "; ", "r", " = ", r, " :: ", Symbol, " <: ", Any)
        #= In[16]:13 =#
        circum = 2 * pi * r
        println("A: ", "circumference(r)", "; ", "circum", " = ", 2 * pi * r)
        push!(Any[], Variable("circum", 2 * pi * r, "circumference(r)"))
        #= In[16]:14 =#
        println("R: ", "circumference(r)", " returns ", Any[:circum])
        return circum
    end
    #= In[16]:17 =#
    function exp_decay(init_value, decay_rate, t)
        println("F: ", "exp_decay(init_value, decay_rate, t)", "; ", "init_value", " = ", init_value, " :: ", Symbol, " <: 

In [26]:
eval(annotated_expr)

F: compute_circle_metrics(r); r = 5 :: Symbol <: Any
F: circumference(r); r = 5 :: Symbol <: Any
A: circumference(r); circum = 31.41592653589793
R: circumference(r) returns Any[:circum]
F: circumference(r); r = 5 :: Symbol <: Any
A: circumference(r); circum = 31.41592653589793
R: circumference(r) returns Any[:circum]
A: compute_circle_metrics(r); c = 31.41592653589793
F: circumference(r); r = 5 :: Symbol <: Any
A: circumference(r); circum = 31.41592653589793
R: circumference(r) returns Any[:circum]
A: compute_circle_metrics(r); a = 78.53981633974483
R: compute_circle_metrics(r) returns Any[:((c, a))]
compute_circle_metrics(5) = (31.41592653589793, 78.53981633974483)
F: fib(n::Int64); n = 3 :: Symbol <: Int64
F: fib(n::Int64); n = 2 :: Symbol <: Int64
F: fib(n::Int64); n = 1 :: Symbol <: Int64
fib(3) = 2
F: add_some_noise(x); x = 357 :: Symbol <: Any


A: add_some_noise(x); rng = MersenneTwister(UInt32[0x000004d2], Random.DSFMT.DSFMT_state(Int32[-1393240018, 1073611148, 45497681, 1072875908, 436273599, 1073674613, -2043716458, 1073445557, -254908435, 1072827086, 2046617495, 1072848348, -97210901, 1072807902, 2077484130, 1073238679, -1338580334, 1073709371, -1030512225, 1073184390, -1980806249, 1073166650, -538182568, 1073186473, 389676635, 1073308720, -1796451258, 1073272325, 436637761, 1073055424, 1388165018, 1073431376, -1217066104, 1072912021, 1109506735, 1072719037, 1554729443, 1073043031, 215899888, 1072799862, 1399222867, 1073550525, 379786997, 1073421724, 2090785710, 1072950026, 441691964, 1073565251, 129434264, 1073099577, -126261802, 1073186748, 1143618815, 1073065844, 1150608234, 1072931292, -1119331776, 1072734919, 460600794, 1073689692, 1830837105, 1072772067, 2069748834, 1072722948, 293020050, 1073123338, 1882881498, 1073337265, 1437391187, 1073242875, 991840901, 1072938516, -1755036291, 1072858722, -1497724557, 10730215

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,

 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x0000000000000000000000000

1-element Array{Float64,1}:
 309.6429510965947

In [27]:
for method in first_and_second_order_calls
    println(method.expression)
end

2 * pi * r
init_value(1 - decay_rate) ^ t
circumference(r)
pi * r ^ 2
MersenneTwister(1234)
Base.randn(rng, 1)


In [28]:
for v in prog_vars
    println(v)
end

Variable{String,Float64,String}("circum", 31.41592653589793, "circumference(r)")
Variable{String,Float64,String}("circum", 31.41592653589793, "circumference(r)")
Variable{String,Float64,String}("circum", 31.41592653589793, "circumference(r)")
Variable{String,Float64,String}("c", 31.41592653589793, "compute_circle_metrics(r)")
Variable{String,Float64,String}("a", 78.53981633974483, "compute_circle_metrics(r)")


Variable{String,MersenneTwister,String}("rng", MersenneTwister(UInt32[0x000004d2], Random.DSFMT.DSFMT_state(Int32[-1393240018, 1073611148, 45497681, 1072875908, 436273599, 1073674613, -2043716458, 1073445557, -254908435, 1072827086, 2046617495, 1072848348, -97210901, 1072807902, 2077484130, 1073238679, -1338580334, 1073709371, -1030512225, 1073184390, -1980806249, 1073166650, -538182568, 1073186473, 389676635, 1073308720, -1796451258, 1073272325, 436637761, 1073055424, 1388165018, 1073431376, -1217066104, 1072912021, 1109506735, 1072719037, 1554729443, 1073043031, 215899888, 1072799862, 1399222867, 1073550525, 379786997, 1073421724, 2090785710, 1072950026, 441691964, 1073565251, 129434264, 1073099577, -126261802, 1073186748, 1143618815, 1073065844, 1150608234, 1072931292, -1119331776, 1072734919, 460600794, 1073689692, 1830837105, 1072772067, 2069748834, 1072722948, 293020050, 1073123338, 1882881498, 1073337265, 1437391187, 1073242875, 991840901, 1072938516, -1755036291, 1072858722, -1

.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], UInt128[0x00000000000000000000000000000000, 0x000000000000000000000000

 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x0000000000000000000000000

Variable{String,Array{Float64,1},String}("x", [309.643], "add_some_noise(x)")
