In [2]:
using SwiftObjectStores
using Iterators
using Base.Test

In [3]:
"""
Save the contents into a temporary 
Takes a Scope to track which files are created
when the scope is closed, will delete the tmpfiles"""
function asfile(scope::Vector, contents)
    fname, fh = mktemp() 
    print(fh, contents)
    close(fh)
    push!(scope, fname)
    fname
end

"""Holds a scope for deleting the tmp files, created within, using `asfile`
Use a do block.
"""
function withscope(func)
    files = String[]
    func(files)
    rm.(files)
end

###### Test that is works right
@testset "asfile" begin
    fname1 = ""
    fname2 = ""
    withscope() do scope
        fname1 = asfile(scope, "hi ho")
        @test readstring(`cat $fname1`) == "hi ho"
        
        fname2 = asfile(scope, "li lo")
        @test readstring(`cat $fname2`) == "li lo"
        
        @test isfile(fname1)
        @test isfile(fname2)
    end
    @test !isfile(fname1)
    @test !isfile(fname2)
end;

Test Summary: | Pass  Total
  asfile      |    6      6


In [78]:
serv=SwiftService()
train = get_file(serv, "color", "monroe/train.csv") do fh
    readdlm(fh,'\t')
end

descs = unique(train[:,1])

829-element Array{Any,1}:
 "acid green"      
 "adobe"           
 "algae green"     
 "algae"           
 "almost black"    
 "amber"           
 "amethyst"        
 "apple green"     
 "apple"           
 "apricot"         
 "aqua blue"       
 "aqua green"      
 "aqua"            
 ⋮                 
 "wisteria"        
 "yellow-brown"    
 "yellow-green"    
 "yellow"          
 "yellowish brown" 
 "yellowish green" 
 "yellowish"       
 "yellowish orange"
 "yellow ochre"    
 "yellow-orange"   
 "yellow tan"      
 "yuck"            

In [79]:
blueish=0
bluish=0
for term in train[:,1]
    if contains(term, "bluish")
        bluish+=1
    end
    if contains(term, "blueish")
        blueish+=1
    end
end
@show blueish
@show bluish

blueish = 684
bluish = 3612


3612

0.840782122905028

In [93]:
terms = unique(vcat(split.(descs)...) )

for term in terms
    if contains(term, "almost")
        println(term)
    end
end

almost


display_svg (generic function with 1 method)

In [121]:
draw = pipeline(
    `hfst-fst2fst -b --openfst-tropical`,
    `fstdraw --portrait`,
    `sed "s/@_EPSILON_SYMBOL_@/<>/"`,
    `sed "s/@_IDENTITY_SYMBOL_@/@ID@/"`,
    `sed "s/@_IDENTITY_SYMBOL_@/@ID@/"`,
    `dot -Tsvg`)
display_svg(img)=display("image/svg+xml", img)



display_svg (generic function with 1 method)

In [133]:
topscope = String[]

0-element Array{String,1}

In [136]:
rules = """
!ish rules
[{blueish} | {bluish}]:{blue ish}
{orangish}:{orange ish}
{purplish}:{purple ish}
{reddish}:{red ish}
{darkish}:{dark ish}
{greenish}:{green ish}
{greyish}:{grey ish}
{lightish}:{light ish}
{pinkish}:{pink ish}
{tealish}:{teal ish}
{yellowish}:{yellow ish}
{brownish}:{brown ish}
!y rules
{bluey}:{blue y}
{dirty}:{dirt y}
{dusky}:{dusk y}
{dusty}:{dust y}
{grassy}:{grass y}
{greeny}:{green y}
{leafy}:{leaf y}
{minty}:{mint y}
{muddy}:{mud y}
{murky}:{murk y}
{orangey}:{orange y}
{peachy}:{peach y}
{pinky}:{pink y}
{purpley}:{purple y}
{reddy}:{red y}
{rosy}:{rose y}
{rusty}:{rust y}
{sandy}:{sand y}
"""

rulesFstFile = asfile(topscope, readstring(pipeline(
`echo "$rules"`,
`hfst-regexp2fst -j`,
`hfst-minimize`,  
)))
#`hfst-fst2txt`
#draw
#) |> readstring |> println #|> display_svg

"/dev/shm/tmpM3t1pi"

In [75]:
withscope() do scope
    rulesfst = asfile(scope, pipeline(
    `echo "$rules"`,
    `hfst-regexp2fst -j`,
    `hfst-invert`,
    `hfst-minimize`,
    `hfst-fst2fst -O`
    ) |> readstring)
    
    pipeline(
    `echo "blue ish
red ish"
    `,
    `hfst-lookup  -O apertium $rulesfst`
    ) |> run
end

^blue ish/blueish/bluish$
^red ish/reddish$


> > > 

1-element Array{Void,1}:
 nothing

In [153]:
advrules ="""
Alphabet

! The alphabet should contain all symbols which are used in the grammar.
! Symbols consist of strings of utf-8 characters. Reserved words and white-space
! need to be quoted using %.
!a b c d e f g h i j k l m n o p q r s t u v w x y z %- % ;
a b c  %- %# ;

Sets
!Letter = a b c d e f g h i j k l m n o p q r s t u v w x y z ;
Letter = a b c ;


Rules

"Seperate - into seperated token of its own -- put a space beore and after it"
%-:%#%-%# <=> :Letter+ _ :Letter+ ;


"""

advrulesFstFile = asfile(topscope, readstring(pipeline(
    `echo "$advrules"`,
    `hfst-twolc`
)))

pipeline(
    `echo cc-cc`,
    `hfst-lookup $advrulesFstFile`
) |> run

cc-cc	cc#-#cc	0.000000



Reading input from STDIN.
Writing output to STDOUT.
Reading alphabet.
Reading sets.
Reading rules and compiling their contexts and centers.
Compiling and storing rules.
Compiling rules.
Storing rules.
Using HFST basic transducer format and performing slow lookups
> > 

In [143]:
pipeline(
    `echo abc`,
    `hfst-compose $advrulesFstFile $rulesFstFile`,
    draw
) |> readstring |> display_svg

hfst-compose: /dev/shm/tmpptD4zY is not a valid transducer file
hfst-fst2fst: <stdin> is not a valid transducer file
ERROR: FstHeader::Read: Bad FST header: standard input


LoadError: LoadError: failed processes:
  Process(`hfst-compose /dev/shm/tmpptD4zY /dev/shm/tmpM3t1pi`, ProcessExited(1)) [1]
  Process(`hfst-fst2fst -b --openfst-tropical`, ProcessExited(1)) [1]
  Process(`fstdraw --portrait`, ProcessExited(1)) [1]
while loading In[143], in expression starting on line 1