In [31]:
import PuzzleTools

In [46]:
reload("PuzzleTools")



In [165]:
words = PuzzleTools.Wordsets.unixwords()[1:100:end];

In [169]:
module notice

import Base: size, getindex, isless

abstract Feature
size(::Feature) = ()
getindex(f::Feature, ::CartesianIndex{0}) = f

const FEATURES = []
macro allfeatures(T, expr)
    quote
        allfeatures(::Type{$T}) = $(esc(expr))
        append!(FEATURES, allfeatures($T))
    end
end

immutable ContainsLetter <: Feature
    letter::Char
end
@allfeatures ContainsLetter [ContainsLetter(l) for l in 'a':'z']
satisfies(f::ContainsLetter, word) = f.letter in word

immutable LetterAtIndex <: Feature
    letter::Char
    index::Int
end
@allfeatures LetterAtIndex [LetterAtIndex(l, j) for l in 'a':'z' for j in 1:26]
satisfies(f::LetterAtIndex, word) = length(word) >= f.index && word[f.index] == f.letter

isconsonant(char) = match(r"[bcdfghjklmnpqrstvwxyz]", char)
isvowel(char) = match(r"[aeiouy]", char)

immutable AlternatesConsonantVowel <: Feature
end
@allfeatures AlternatesConsonantVowel [AlternatesConsonantVowel()]
satisfies(f::AlternatesConsonantVowel, word) = 
    ismatch(r"^([bcdfghjklmnpqrstvwxyz][aeiouy])+[bcdfghjklmnpqrstvwxyz]?$", word) ||
    ismatch(r"^([aeiouy][bcdfghjklmnpqrstvwxyz])+[aeiouy]?$", word)


function allfeatures()
    return FEATURES
end

function frequency(feature::Feature, words::AbstractArray{String})
    sum(satisfies.(feature, words)) / length(words)
end

type Corpus
    features::Vector{Feature}
    frequencies::Vector{Float64}
end
function Corpus(words::AbstractArray{String})
    features = allfeatures()
    frequencies = Float64[frequency(f, words) for f in features]
    Corpus(features, frequencies)
end

immutable FeatureResult
    feature::Feature
    satisfied::Vector{Bool}
    frequency::Float64
end

isless(f1::FeatureResult, f2::FeatureResult) = f1.frequency < f2.frequency

function summarize(feature::Feature, frequency::Float64, words::AbstractArray{String})
    sat = Vector{Bool}(length(words))
    total_freq = 1.0
    for i in 1:length(words)
        if satisfies(feature, words[i])
            sat[i] = true
            total_freq *= frequency
        else
            sat[i] = false
            total_freq *= (1 - frequency)
        end
    end
    FeatureResult(feature, sat, binomial(length(sat), sum(sat)) * total_freq)
end

function analyze(corpus::Corpus, words::AbstractArray{String})
    results = FeatureResult[summarize(corpus.features[i], corpus.frequencies[i], words) for i in 1:length(corpus.features)]
end
    

end




notice

In [170]:
c = notice.Corpus(words)

notice.Corpus(notice.Feature[notice.ContainsLetter('a'),notice.ContainsLetter('b'),notice.ContainsLetter('c'),notice.ContainsLetter('d'),notice.ContainsLetter('e'),notice.ContainsLetter('f'),notice.ContainsLetter('g'),notice.ContainsLetter('h'),notice.ContainsLetter('i'),notice.ContainsLetter('j')  …  notice.LetterAtIndex('z',18),notice.LetterAtIndex('z',19),notice.LetterAtIndex('z',20),notice.LetterAtIndex('z',21),notice.LetterAtIndex('z',22),notice.LetterAtIndex('z',23),notice.LetterAtIndex('z',24),notice.LetterAtIndex('z',25),notice.LetterAtIndex('z',26),notice.AlternatesConsonantVowel()],[0.615515,0.160237,0.365833,0.267062,0.672319,0.0962272,0.174226,0.234421,0.615515,0.0135651  …  0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0907164])

In [171]:
d1s1 = ["lowered", "levitate", "inanimate", "paradise", "leveraged", "sizes", "tuxedo"]
sort(notice.analyze(c, d1s1))

703-element Array{notice.FeatureResult,1}:
 notice.FeatureResult(notice.AlternatesConsonantVowel(),Bool[true,true,true,true,true,true,true],5.05593e-8)
 notice.FeatureResult(notice.LetterAtIndex('l',1),Bool[true,true,false,false,true,false,false],0.000598249) 
 notice.FeatureResult(notice.LetterAtIndex('v',3),Bool[false,true,false,false,true,false,false],0.00298332) 
 notice.FeatureResult(notice.LetterAtIndex('e',4),Bool[true,false,false,false,true,true,true],0.00306368)   
 notice.FeatureResult(notice.LetterAtIndex('z',3),Bool[false,false,false,false,false,true,false],0.00883437)
 notice.FeatureResult(notice.LetterAtIndex('d',5),Bool[false,false,false,true,false,false,true],0.0130817)  
 notice.FeatureResult(notice.LetterAtIndex('e',8),Bool[false,true,false,true,true,false,false],0.0145743)   
 notice.FeatureResult(notice.LetterAtIndex('x',3),Bool[false,false,false,false,false,false,true],0.0373176) 
 notice.FeatureResult(notice.ContainsLetter('c'),Bool[false,false,false,false,false,f

In [149]:
result = sort(notice.analyze(c, ["hello", "help", "howdy", "hi"]))

704-element Array{notice.FeatureResult,1}:
 notice.FeatureResult(notice.LetterAtIndex('h',1),Bool[true,true,true,true],2.11865e-6)  
 notice.FeatureResult(notice.ContainsLetter('h'),Bool[true,true,true,true],0.00301987)   
 notice.FeatureResult(notice.LetterAtIndex('l',3),Bool[true,true,false,false],0.0204854) 
 notice.FeatureResult(notice.ContainsLetter('a'),Bool[false,false,false,false],0.0218533)
 notice.FeatureResult(notice.LetterAtIndex('w',3),Bool[false,false,true,false],0.0314448)
 notice.FeatureResult(notice.ContainsLetter('r'),Bool[false,false,false,false],0.0436077)
 notice.FeatureResult(notice.ContainsLetter('n'),Bool[false,false,false,false],0.0530242)
 notice.FeatureResult(notice.ContainsLetter('t'),Bool[false,false,false,false],0.0636739)
 notice.FeatureResult(notice.LetterAtIndex('y',5),Bool[false,false,true,false],0.0644333)
 notice.FeatureResult(notice.LetterAtIndex('e',2),Bool[true,true,false,false],0.0908535) 
 notice.FeatureResult(notice.ContainsLetter('s'),Bool[fal