In [58]:
using TypedDelegation   
using StaticArrays
using Base.Test

In [2]:
using Tokenize
import Tokenize: Tokens
import Tokens: Token, startpos, endpos, untokenize, kind, exactkind



In [3]:
src = """
a=1
a+3
c=a+4; b=4

#A comment

function f(x)
    (1+2)/3
end

#= a 
multiline
comment
=#

println("abc ; efg")

\"""
a multiline
string
\"""

a+=2

map(1:5) do x
    a=2*x
    3*a
end

begin 
    c=1
end

if c
    d=3
end

"""
tokens = tokenize(src) |> collect;


In [4]:
immutable AnnotatedToken
    tok::Token
    notes::Dict{Symbol, Any}
end

AnnotatedToken(vv::Token) = AnnotatedToken(vv, Dict{Symbol, Any}())
@delegate_oneField(AnnotatedToken, tok, [exactkind, startpos, endpos, untokenize, kind])

kind (generic function with 2 methods)

In [5]:
?exactkind

search:



No documentation found.

`Tokenize.Tokens.exactkind` is a `Function`.

```
# 2 methods for generic function "exactkind":
exactkind(t::Tokenize.Tokens.Token) at /home/ubuntu/.julia/v0.5/Tokenize/src/token.jl:69
exactkind(a::AnnotatedToken, args...) at /home/ubuntu/.julia/v0.5/TypedDelegation/src/TypedDelegation.jl:69
```


In [6]:

function Base.show(io::IO, atok::AnnotatedToken)
  start_r, start_c = startpos(atok)
  end_r, end_c = endpos(atok)
  str = kind(atok) == Tokens.ENDMARKER ? "" : untokenize(atok)
    print(io, start_r, ",", start_c, "-",
            end_r,   ",", end_c,   ":",
    "   ", exactkind(atok), " (",lowercase(string(kind(atok))),")","\t")
    show(io, str)
end

In [7]:
display.(AnnotatedToken.(tokens));

1,1-1,1:   IDENTIFIER (identifier)	"a"

1,2-1,2:   EQ (op)	"="

1,3-1,3:   INTEGER (integer)	"1"

1,4-2,0:   WHITESPACE (whitespace)	"\n"

2,1-2,1:   IDENTIFIER (identifier)	"a"

2,2-2,2:   PLUS (op)	"+"

2,3-2,3:   INTEGER (integer)	"3"

2,4-3,0:   WHITESPACE (whitespace)	"\n"

3,1-3,1:   IDENTIFIER (identifier)	"c"

3,2-3,2:   EQ (op)	"="

3,3-3,3:   IDENTIFIER (identifier)	"a"

3,4-3,4:   PLUS (op)	"+"

3,5-3,5:   INTEGER (integer)	"4"

3,6-3,6:   SEMICOLON (semicolon)	";"

3,7-3,7:   WHITESPACE (whitespace)	" "

3,8-3,8:   IDENTIFIER (identifier)	"b"

3,9-3,9:   EQ (op)	"="

3,10-3,10:   INTEGER (integer)	"4"

3,11-5,0:   WHITESPACE (whitespace)	"\n\n"

5,1-5,10:   COMMENT (comment)	"#A comment"

5,11-7,0:   WHITESPACE (whitespace)	"\n\n"

7,1-7,8:   FUNCTION (keyword)	"function"

7,9-7,9:   WHITESPACE (whitespace)	" "

7,10-7,10:   IDENTIFIER (identifier)	"f"

7,11-7,11:   LPAREN (lparen)	"("

7,12-7,12:   IDENTIFIER (identifier)	"x"

7,13-7,13:   RPAREN (rparen)	")"

7,14-8,4:   WHITESPACE (whitespace)	"\n    "

8,5-8,5:   LPAREN (lparen)	"("

8,6-8,6:   INTEGER (integer)	"1"

8,7-8,7:   PLUS (op)	"+"

8,8-8,8:   INTEGER (integer)	"2"

8,9-8,9:   RPAREN (rparen)	")"

8,10-8,10:   FWD_SLASH (op)	"/"

8,11-8,11:   INTEGER (integer)	"3"

8,12-9,0:   WHITESPACE (whitespace)	"\n"

9,1-9,3:   END (keyword)	"end"

9,4-11,0:   WHITESPACE (whitespace)	"\n\n"

11,1-14,2:   COMMENT (comment)	"#= a \nmultiline\ncomment\n=#"

14,3-16,0:   WHITESPACE (whitespace)	"\n\n"

16,1-16,7:   IDENTIFIER (identifier)	"println"

16,8-16,8:   LPAREN (lparen)	"("

16,9-16,19:   STRING (string)	"\"abc ; efg\""

16,20-16,20:   RPAREN (rparen)	")"

16,21-18,0:   WHITESPACE (whitespace)	"\n\n"

18,1-21,3:   TRIPLE_STRING (triple_string)	"\"\"\"\na multiline\nstring\n\"\"\""

21,4-23,0:   WHITESPACE (whitespace)	"\n\n"

23,1-23,1:   IDENTIFIER (identifier)	"a"

23,2-23,2:   PLUS (op)	"+"

23,3-23,3:   EQ (op)	"="

23,4-23,4:   INTEGER (integer)	"2"

23,5-25,0:   WHITESPACE (whitespace)	"\n\n"

25,1-25,3:   IDENTIFIER (identifier)	"map"

25,4-25,4:   LPAREN (lparen)	"("

25,5-25,5:   INTEGER (integer)	"1"

25,6-25,6:   COLON (op)	":"

25,7-25,7:   INTEGER (integer)	"5"

25,8-25,8:   RPAREN (rparen)	")"

25,9-25,9:   WHITESPACE (whitespace)	" "

25,10-25,11:   DO (keyword)	"do"

25,12-25,12:   WHITESPACE (whitespace)	" "

25,13-25,13:   IDENTIFIER (identifier)	"x"

25,14-26,4:   WHITESPACE (whitespace)	"\n    "

26,5-26,5:   IDENTIFIER (identifier)	"a"

26,6-26,6:   EQ (op)	"="

26,7-26,7:   INTEGER (integer)	"2"

26,8-26,8:   STAR (op)	"*"

26,9-26,9:   IDENTIFIER (identifier)	"x"

26,10-27,4:   WHITESPACE (whitespace)	"\n    "

27,5-27,5:   INTEGER (integer)	"3"

27,6-27,6:   STAR (op)	"*"

27,7-27,7:   IDENTIFIER (identifier)	"a"

27,8-28,0:   WHITESPACE (whitespace)	"\n"

28,1-28,3:   END (keyword)	"end"

28,4-30,0:   WHITESPACE (whitespace)	"\n\n"

30,1-30,5:   BEGIN (keyword)	"begin"

30,6-31,4:   WHITESPACE (whitespace)	" \n    "

31,5-31,5:   IDENTIFIER (identifier)	"c"

31,6-31,6:   EQ (op)	"="

31,7-31,7:   INTEGER (integer)	"1"

31,8-32,0:   WHITESPACE (whitespace)	"\n"

32,1-32,3:   END (keyword)	"end"

32,4-34,0:   WHITESPACE (whitespace)	"\n\n"

34,1-34,2:   IF (keyword)	"if"

34,3-34,3:   WHITESPACE (whitespace)	" "

34,4-34,4:   IDENTIFIER (identifier)	"c"

34,5-35,4:   WHITESPACE (whitespace)	"\n    "

35,5-35,5:   IDENTIFIER (identifier)	"d"

35,6-35,6:   EQ (op)	"="

35,7-35,7:   INTEGER (integer)	"3"

35,8-36,0:   WHITESPACE (whitespace)	"\n"

36,1-36,3:   END (keyword)	"end"

36,4-38,0:   WHITESPACE (whitespace)	"\n\n"

38,1-38,0:   ENDMARKER (endmarker)	""

- **New line** always ends expressions
 - Unless Immediately within a block opened by `(`
 - Or where the last nonwhitespace token was binary operator (including a `,`?)

In [62]:
isnewline(tok) = kind(tok)==Tokens.WHITESPACE && '\n' ∈ untokenize(tok)




isnewline (generic function with 1 method)

In [63]:
@test isnewline(tokens[end-1])

[1m[32mTest Passed
[0m  Expression: isnewline(tokens[end - 1])

is_ender (generic function with 1 method)

In [70]:
const identifier_phrase_end_kinds = SVector[Tokens.IDENTIFIER, Tokens.RPAREN, Tokens.RSQUARE]

LoadError: cannot declare identifier_phrase_end_kinds constant; it already has a value

In [8]:
function annotate(toks)
    subclause_markers = Dict(
            Tokens.LBRACE => Tokens.RBRACE,
            Tokens.LPAREN => Tokens.RPAREN,
            Tokens.LSQUARE => Tokens.RSQUARE,
            Tokens.FOR => Tokens.END,
            Tokens.WHILE => Tokens.END,
            Tokens.IF => Tokens.END,
            Tokens.TRY => Tokens.END,
            Tokens.BEGIN => Tokens.END,
            Tokens.QUOTE => Tokens.END
            
    )
    open_subclauses = AnnotatedToken[]
    next_closer_exactkind() = subclause_markers[exactkind(open_subclauses[end])]
    
    in_paren() = exactkind(open_subclauses[end]) == Tokens.LPAREN
    in_square() = exactkind(open_subclauses[end]) == Tokens.LSQUARE
    

    
    
    
    history = SVector(Tokens.ENDMARKER, Tokens.ENDMARKER)
    function add_history!(atok)
        if kind(atok) != Tokens.WHITESPACE || isnewline(tok)
            #TODO: Consider replacing this with somehting faster
            @assert(!isnewline(tok) || !isnewline(history[end]), "New lines should not follow each other -- those should have been merged by tokenizer")
            history = @SVector([shift(history)..., kind(atok)])
        end
    end
    
    am_continuing_operation() = history[end]==Tokens.OP && history[end-1] ∈ identifier_phrase_end_kinds

    
    function handle_ifclose!(atok)
        if  next_closer_exactkind() == exactkind(atok)
            opener = pop!(open_subclauses)
            opener.notes[:closer] = atok
            atok.notes[:opener] = opener
        end
    end
    
    function handle_ifopen!(atok)
        if haskey(subclause_markers, exactkind(atok))
            push!(open_subclauses, atok)
        end
    end
    
    
    "Handless expression enders, (this excludes closers)."
    function handle_expression_enders!(atok)
        is_ender = ((!in_paren() &&  !am_continuing_operation() && isnewline(atok)) 
                        || exactkind(atok) == Tokens.SEMICOLON)    
        if is_ender
            atok.notes[:ender] = true
        end
    end
    
    
    outstream = AnnotatedToken[]
    for tok in toks
        atok = Annotated(tok)
        push!(outstream, atok)
                
        handle_ifopen(atok)
        handle_ifclose(atok)
        handle_expression_enders!(atok)
        
        add_history!(atok)         
    end
    outstream
end

annotate (generic function with 1 method)

In [72]:
(tokenize("""
(a +
b)
{a +
b}
[a +
b]


""") |> collect )

25-element Array{Tokenize.Tokens.Token,1}:
 1,1-1,1:   LPAREN	"("         
 1,2-1,2:   IDENTIFIER	"a"     
 1,3-1,3:   WHITESPACE	" "     
 1,4-1,4:   OP	"+"             
 1,5-2,0:   WHITESPACE	"\n"    
 2,1-2,1:   IDENTIFIER	"b"     
 2,2-2,2:   RPAREN	")"         
 2,3-3,0:   WHITESPACE	"\n"    
 3,1-3,1:   LBRACE	"{"         
 3,2-3,2:   IDENTIFIER	"a"     
 3,3-3,3:   WHITESPACE	" "     
 3,4-3,4:   OP	"+"             
 3,5-4,0:   WHITESPACE	"\n"    
 4,1-4,1:   IDENTIFIER	"b"     
 4,2-4,2:   RBRACE	"}"         
 4,3-5,0:   WHITESPACE	"\n"    
 5,1-5,1:   LSQUARE	"["        
 5,2-5,2:   IDENTIFIER	"a"     
 5,3-5,3:   WHITESPACE	" "     
 5,4-5,4:   OP	"+"             
 5,5-6,0:   WHITESPACE	"\n"    
 6,1-6,1:   IDENTIFIER	"b"     
 6,2-6,2:   RSQUARE	"]"        
 6,3-9,0:   WHITESPACE	"\n\n\n"
 9,1-9,0:   ENDMARKER	""       

In [67]:
(a,b) = 
3,4

(3,4)

In [35]:
unshift!(history, Tokens.KEYWORD)

LoadError: MethodError: no method matching unshift!(::StaticArrays.SVector{2,Tokenize.Tokens.Kind}, ::Tokenize.Tokens.Kind)[0m
Closest candidates are:
  unshift!(::Any, ::Any, [1m[31m::Any[0m) at abstractarray.jl:1717
  unshift!(::Any, ::Any, [1m[31m::Any[0m, [1m[31m::Any...[0m) at abstractarray.jl:1718
  unshift!{T}([1m[31m::Array{T,1}[0m, ::Any) at array.jl:537
  ...[0m

2-element StaticArrays.SVector{2,Tokenize.Tokens.Kind}:
 IDENTICAL_WITH_DOT_ABOVE
 KEYWORD                 

In [51]:
methodswith(StaticArray)

In [11]:
begin
    a=2
    b=3
end

3

In [12]:
0,
2

(0,2)

In [13]:
:(a=2;
c=4
b=3) |> dump

LoadError: syntax: missing comma or ) in argument list

In [14]:
[a=2
b=3]

2-element Array{Int64,1}:
 2
 3

In [15]:
a=2
(a
+1
)

3

In [16]:
a=2
(
if a>1
    a
    +1
end
)


1

In [17]:
a=2
(
begin
    a
    +1
end
)


1

In [18]:
a=2
begin
    a
    +1
end



1

In [19]:
a=2
(
[
    a
    +1
]
)


2-element Array{Int64,1}:
 2
 1

In [20]:
a=2
[
    (
    a
    +1
    )
]



1-element Array{Int64,1}:
 3

In [21]:
x=[10,20,30]
a=2
x[a+1] |> display



30

In [22]:
x[(a+1)  
    
] |> display

LoadError: MethodError: no method matching typed_vcat(::Array{Int64,1}, ::Int64)[0m
Closest candidates are:
  typed_vcat{T}([1m[31m::Type{T}[0m, ::Number...) at abstractarray.jl:971
  typed_vcat([1m[31m::Type{T}[0m, ::Any...) at abstractarray.jl:1182[0m

In [23]:
x[
    (
    a
    +1
    )
] |> display

30

In [24]:
x=[10,20,30]
x[(2
    
    )
]

20

In [25]:
x=rand(4,4)
x[ 2;2    
]

LoadError: MethodError: no method matching typed_vcat(::Array{Float64,2}, ::Int64, ::Int64)[0m
Closest candidates are:
  typed_vcat{T}([1m[31m::Type{T}[0m, ::Number...) at abstractarray.jl:971
  typed_vcat([1m[31m::Type{T}[0m, ::Any...) at abstractarray.jl:1182[0m