In [26]:
]activate .

[32m[1m  Activating[22m[39m project at `~/Projects/Noticer.jl`


In [27]:
using PuzzleTools
using Noticer
using Test

In [53]:
function normalize(phrase)
    replace(lowercase(phrase), r"[^a-z ]" => "")
end

words = Set{String}(normalize.(open(readlines, "data/113809of.fic")))
open("/usr/share/dict/words") do f
    for line in readlines(f)
        push!(words, normalize(line))
    end
end

In [54]:
features = all_features();
model = train(features, words)

answers = lowercase.(split("""
        mumble
        tummy
        mumford
        bomgum
        mumy
        """))
results = evaluate(model, answers)
@test description(first(results)) == "Number of occurrences of 'm'"
results[1:5]

[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:06[39m


Description,P value,Observed,Expected
Number of occurrences of 'm',3.2e-56,__▇__,▇▂___
Has a transdeletion with letter 'm',3.1e-07,▅▇,▇_
Number of cardinal directions (NESW),1.4e-05,▇▂____________,▁▅▇▅▃▁________
Number of occurrences of 'u',0.0023,_▇___,▇▂___
Has a transaddition with letter 'd',0.051,▇▅,▇▁


In [55]:
# Based on http://web.mit.edu/puzzle/www/2015/puzzle/ukacd/

    clues = ["hip-flask","infamise","facade","furnaced","ill fame","loofah","odoriferously","focalisation","deceitful","modify","defoliate","definabilities","exemplification","unfeminine","icefield","boastfulness","cuttlefishes","zinckification","folkland","unfeeling","flanges","have a short fuse","pelletifying","fable","efts","state of play","fangled","fitchews","fasciated","soulful","flaunchings","rust-proofing","Mustafa","carfuffle","far-out","goff","quinquefoliate","usufruct","denazified","thoughtfulness","pacificists","calves'-foot jelly","field of view","sweet flag","fandangos","sail-fish","Cadfael","fag","The Return of the Native","gold-foil","feldspathoid","caftan","road-fund licence","purfling","febrifugal","fish eagle","old-fogey","heffalump","confutation","flimping","sell-off","defuzed","battle fatigue","effluences","chaudfroid","flavorous","floury","elf","fusses","dog-faced","felt","law of excluded middle","Schottky effect","faddy","superficializes","tuner amplifiers","off-plan","counter-flory","unselfishness","sound effects","Fawkes","Mafikeng","Laffer curve","felines","foundering","fixed assets","foins","flatlets","dog-fights","lift-off","Fluellen","pouf","suffused","Aesop's Fables","manful","falsifying","kifs","custard coffin","officials","vote of no confidence"]

    map!(clues, clues) do w
        replace(lowercase(w), r"[^a-z]" => "")
    end

    results = evaluate(model, clues)

    @test first(results).feature.description == "Number of occurrences of 'f'"

results[1:5]

Description,P value,Observed,Expected
Number of occurrences of 'f',3.2000000000000003e-225,_▇▁__,▇▁___
Scrabble score,5.4e-140,______▁▂▁▂▂▅▄▆▄▆▆▂▃▄▇▂▄_▄_▁____▁▁▁▁▁____▁______,_____▁▂▃▄▅▆▇▇▇▆▅▄▄▃▂▂▁▁▁▁______________________
Number of reverse alphabetical bigrams,1.7e-78,_▂▄▆▇▅▂▂▁____,_▂▅▇▆▃▁______
Number of occurrences of 'q',2.9999999999999997e-62,▇__,▇__
Has a transdeletion with letter 'f',2.8e-48,▇▃,▇_


In [56]:
    clues = ["season", "saveup", "ecowas", "ignore", "sluice", "hosni", "inbed", "barbeau", "museum", "tobiah", "unsew", "dolce", "anaphia", "teenage"]

    results = evaluate(model, clues)
results[1:5]

Description,P value,Observed,Expected
Number of consonants,1.3e-17,___▇___________,__▁▃▆▇▅▂▁______
Has a transaddition with letter 'q',2.2e-05,▇▁,▇_
Number of unique consonants,2.4e-05,__▂▇________,__▁▄▇▆▃▁____
Has a transaddition with letter 'd',0.00032,▇▅,▇▁
Has a transaddition with letter 'r',0.0022,▇▅,▇▁


In [32]:
    clues = ["citygates", "impulsive", "clickspam", "baptistry", "leviathan", "policecar", "coupdetat", "sforzando", "cartwheel"]
    results = evaluate(model, clues)
results[1:5]

Description,P value,Observed,Expected
Number of unique letters,1.5e-05,________▇______,___▁▂▄▇▇▅▂▁____
Has a 1-letter transdeletion,0.011,▇▆,▂▇
Number of occurrences of 'c',0.022,▇▅▄__,▇▂___
Number of occurrences of 'p',0.054,▆▇___,▇▂___
Number of occurrences of 't',0.069,▇▄▅__,▇▄▁__


In [61]:
# http://web.mit.edu/puzzle/www/1999/puzzles/1Gumshoe/Warrants1/w1.2/w1.2.html
@testset "warrant 1.2" begin
    for (names, common_letter) in [
#         (["racerx", "americanmaid", "kodachi", "ladyjane"], 'a'),
#         (["brain", "judyjetson", "jonnyquest", "jeannette"], 'n'),
        (["kenshin", "lisasimpson", "michiganjfrog", "sheila"], 'i'),
#         (["bedtimebear", "sherman", "stimpy", "mrmagoo"], 'm'),
#         (["bettyboop", "sweetpollypurebred", "skeletor", "firefly"], 'e')
        ]
        results = evaluate(model, names)
        @show results[1:5]
        @test description(first(results)) == "Number of occurrences of '$common_letter'"
    end
end


results[1:5] = Noticer.EvaluationResult[EvaluationResult(
	Scrabble score, p=4.8e-71,
	obs: _________▇____▇▇________________▇______________,
	exp: _____▁▂▃▄▅▆▇▇▇▆▅▄▄▃▂▂▁▁▁▁______________________), EvaluationResult(
	Number of reverse alphabetical bigrams, p=2.9e-09,
	obs: __▇▇_▇__▇____,
	exp: _▂▅▇▆▃▁______), EvaluationResult(
	Has a transaddition with letter 'j', p=1e-05,
	obs: ▇▂,
	exp: ▇_), EvaluationResult(
	Number of unique consonants, p=0.0013,
	obs: ___▇▇▇__▇___,
	exp: __▁▄▇▆▃▁____), EvaluationResult(
	Number of occurrences of 'j', p=0.0021,
	obs: ▇▂_,
	exp: ▇__)]
warrant 1.2: [91m[1mTest Failed[22m[39m at [39m[1mIn[61]:12[22m
  Expression: description(first(results)) == "Number of occurrences of '$(common_letter)'"
   Evaluated: "Scrabble score" == "Number of occurrences of 'i'"
Stacktrace:
 [1] [0m[1mmacro expansion[22m
[90m   @ [39m[90m~/apps/julia-1.7.1/share/julia/stdlib/v1.7/Test/src/[39m[90m[4mTest.jl:445[24m[39m[90m [inlined][39m
 [2] [0m[1mmacro exp

LoadError: [91mSome tests did not pass: 0 passed, 1 failed, 0 errored, 0 broken.[39m

In [34]:
# http://web.mit.edu/puzzle/www/1999/puzzles/1Gumshoe/Warrants1/w1.2/w1.2.html
@testset "warrant 1.2" begin
    for (names, common_letter) in [
#         (["racerx", "americanmaid", "kodachi", "ladyjane"], 'a'),
#         (["brain", "judyjetson", "jonnyquest", "jeannette"], 'n'),
        (["kenshin", "lisasimpson", "michiganjfrog", "sheila"], 'i'),
#         (["bedtimebear", "sherman", "stimpy", "mrmagoo"], 'm'),
#         (["bettyboop", "sweetpollypurebred", "skeletor", "firefly"], 'e')
        ]
        results = evaluate(model, names)
        @show results[1:10]
        @test description(first(results)) == "Number of occurrences of '$common_letter'"
    end
end


results[1:10] = Noticer.EvaluationResult[EvaluationResult(
	Scrabble score, p=4.8e-71,
	obs: _________▇____▇▇________________▇______________,
	exp: _____▁▂▃▄▅▆▇▇▇▆▅▄▄▃▂▂▁▁▁▁______________________), EvaluationResult(
	Number of reverse alphabetical bigrams, p=2.9e-09,
	obs: __▇▇_▇__▇____,
	exp: _▂▅▇▆▃▁______), EvaluationResult(
	Has a transaddition with letter 'j', p=1e-05,
	obs: ▇▂,
	exp: ▇_), EvaluationResult(
	Number of unique consonants, p=0.0013,
	obs: ___▇▇▇__▇___,
	exp: __▁▄▇▆▃▁____), EvaluationResult(
	Number of occurrences of 'j', p=0.0021,
	obs: ▇▂_,
	exp: ▇__), EvaluationResult(
	Has a 1-letter transdeletion, p=0.0079,
	obs: ▇▂,
	exp: ▂▇), EvaluationResult(
	Number of occurrences of 'h', p=0.021,
	obs: ▂▇__,
	exp: ▇▁__), EvaluationResult(
	Number of occurrences of 'g', p=0.022,
	obs: ▇_▂__,
	exp: ▇▂___), EvaluationResult(
	Has a transaddition with letter 't', p=0.027,
	obs: ▇▇,
	exp: ▇▁), EvaluationResult(
	Number of unique letters, p=0.048,
	obs: ______▇_▄__▄___,
	exp: ___▁▂

LoadError: [91mSome tests did not pass: 0 passed, 1 failed, 0 errored, 0 broken.[39m

In [35]:
# http://web.mit.edu/puzzle/www/2012/puzzles/william_s_bergman/behave/

@testset "behave" begin
    results = evaluate(model, ["annieproulx", "commutative", "hugoweaving", "mountaindew", "mozambique", "sequoia"])
    @test "Number of unique vowels" in description.(results[1:2])

    results = evaluate(model, ["almost", "biopsy", "chimp", "films", "ghost", "tux"])
    @test description(first(results)) == "Number of reverse alphabetical bigrams"

    results = evaluate(model, ["balked", "barspoon", "highnoon", "klutzy", "onyx", "posted"])
    @test description(first(results)) == "Number of reverse sequential bigrams"
end


[0m[1mTest Summary: | [22m[32m[1mPass  [22m[39m[36m[1mTotal[22m[39m
behave        | [32m   3  [39m[36m    3[39m


Test.DefaultTestSet("behave", Any[], 3, false, false)

In [59]:
# http://web.mit.edu/puzzle/www/2013/coinheist.com/get_smart/following_the_news/index.html
# @testset  "following the news" begin
    results = evaluate(model, ["andrewlin",
                      "betatests",
                      "clockofthelongnow",
                      "decompressor",
                      "eugene",
                      "fungusproofsword",
                      "gleemen",
                      "hansardise",
                      "interpose"])
#     @show results[1:5]
#     @test description(first(results)) == "Number of cardinal directions (NESW)"
results[1:10]
# end


Description,P value,Observed,Expected
Scrabble score,1.9e-124,_______▄__▄▇_▄▄____▄_________▄_____▄___________,_____▁▂▃▄▅▆▇▇▇▆▅▄▄▃▂▂▁▁▁▁______________________
Number of consonants,1.6e-69,__▂_▂▂▇_▂__▂▂__,__▁▃▆▇▅▂▁______
Number of alphabetical bigrams,2.4e-51,__▄▄▇▇▇___▄__,_▁▄▇▆▃▁______
Number of occurrences of 'o',2.6000000000000003e-44,▇▁▁▁▁_,▇▄▁___
Number of unique consonants,3.1e-19,__▂▂▂▇▂_▂▂__,__▁▄▇▆▃▁____
Number of cardinal directions (NESW),5.6e-10,____▇_________,▁▅▇▅▃▁________
Number of occurrences of 't',0.0015,▇▂_▁_,▇▄▁__
Number of reverse alphabetical bigrams,0.0016,__▂▄▇_▂_▂____,_▂▅▇▆▃▁______
Has a transdeletion with letter 's',0.015,▇_,▇▅
Number of occurrences of 'w',0.024,▇▄__,▇▁__


In [37]:
    # http://www.maths.usyd.edu.au/ub/sums/puzzlehunt/2016/puzzles/A2S1_Last_Resort.pdf
    results = evaluate(model, ["advent", "achilles", "binary", "norway", "bubbly", "yacht", "anchor"])
@show results[1:5]
@test description(first(results)) == "Number of reverse alphabetical bigrams"


results[1:5] = Noticer.EvaluationResult[EvaluationResult(
	Number of occurrences of 'b', p=1.1e-36,
	obs: ▇▁_▁_,
	exp: ▇▁___), EvaluationResult(
	Number of reverse alphabetical bigrams, p=1.1e-15,
	obs: _▇___________,
	exp: _▂▅▇▆▃▁______), EvaluationResult(
	Has a transdeletion with letter 'y', p=2.2e-07,
	obs: ▇▅,
	exp: ▇_), EvaluationResult(
	Has a transaddition with letter 's', p=0.0014,
	obs: _▇,
	exp: ▇▅), EvaluationResult(
	Number of occurrences of 'y', p=0.0052,
	obs: ▅▇__,
	exp: ▇▁__)]
[91m[1mTest Failed[22m[39m at [39m[1mIn[37]:4[22m
  Expression: description(first(results)) == "Number of reverse alphabetical bigrams"
   Evaluated: "Number of occurrences of 'b'" == "Number of reverse alphabetical bigrams"


LoadError: [91mThere was an error during testing[39m

In [38]:
results[1:5]

Description,P value,Observed,Expected
Number of occurrences of 'b',1.1e-36,▇▁_▁_,▇▁___
Number of reverse alphabetical bigrams,1.1e-15,_▇___________,_▂▅▇▆▃▁______
Has a transdeletion with letter 'y',2.2e-07,▇▅,▇_
Has a transaddition with letter 's',0.0014,_▇,▇▅
Number of occurrences of 'y',0.0052,▅▇__,▇▁__


In [39]:
results[1]

EvaluationResult(
	Number of occurrences of 'b', p=1.1e-36,
	obs: ▇▁_▁_,
	exp: ▇▁___)

In [40]:
results[1].test

Pearson's Chi-square Test
-------------------------
Population details:
    parameter of interest:   Multinomial Probabilities
    value under h_0:         [0.853233, 0.134502, 0.0114339, 0.000810167, 2.11348e-5]
    point estimate:          [0.714286, 0.142857, 0.0, 0.142857, 0.0]
    95% confidence interval: [(0.5714, 1.0), (0.0, 0.5489), (0.0, 0.406), (0.0, 0.5489), (0.0, 0.406)]

Test summary:
    outcome with 95% confidence: reject h_0
    one-sided p-value:           <1e-35

Details:
    Sample size:        7
    statistic:          174.57831544797503
    degrees of freedom: 4
    residuals:          [-0.397983, 0.0602763, -0.282909, 13.2036, -0.0121632]
    std. residuals:     [-1.03884, 0.0647908, -0.28454, 13.209, -0.0121633]


In [41]:
results[2].test

Pearson's Chi-square Test
-------------------------
Population details:
    parameter of interest:   Multinomial Probabilities
    value under h_0:         [0.00555109, 0.0664088, 0.204369, 0.303521, 0.240972, 0.117165, 0.0436902, 0.0135889, 0.00386745, 0.000669231, 0.000140891, 3.52227e-5, 2.11336e-5]
    point estimate:          [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
    95% confidence interval: [(0.0, 0.2344), (1.0, 1.0), (0.0, 0.2344), (0.0, 0.2344), (0.0, 0.2344), (0.0, 0.2344), (0.0, 0.2344), (0.0, 0.2344), (0.0, 0.2344), (0.0, 0.2344), (0.0, 0.2344), (0.0, 0.2344), (0.0, 0.2344)]

Test summary:
    outcome with 95% confidence: reject h_0
    one-sided p-value:           <1e-14

Details:
    Sample size:        7
    statistic:          98.40765885223294
    degrees of freedom: 12
    residuals:          [-0.197123, 9.58502, -1.19607, -1.45762, -1.29877, -0.905623, -0.55302, -0.308419, -0.164536, -0.0684443, -0.0314044, -0.0157022, -0.0121629]
    std. 

In [42]:
# MIT Mystery Hunt 2013 puzzle Wordplay 
# http://www.mit.edu/~puzzle/2013/coinheist.com/get_smart/wordplay/index.html

@testset "wordplay" begin

    # Set 1
    @test description(first(evaluate(model, ["ample", "adenoid", "music", "fifa"]))) == "Is a hill word"

    # Set 2
    # @test best_feature(["peeped", "isseis", "fee", "acacia", "salsas", "arrear"]).description == "is a pyramid word"

    # Set 3
    @test description(first(evaluate(model, ["skort", "sporty", "yolks", "peccadillo", "unknot", "rosy"]))) == "Is a valley word"

    # Set 4
    @test description(first(evaluate(model, ["testset", "lol", "tenet", "malayalam"]))) == "Is a palindrome"

    # Set 5
    @test "Number of double letters" in description.(evaluate(model, ["hitchhiker", "kaashoek", "jellystone", "kierkegaard", "metallica", "maastrict", "menschheit"])[1:3])

    # Set 6
    @test description(first(evaluate(model, ["aime", "eye", "eerie", "riaa", "oahu", "oeis"]))) == "Number of unique consonants"
end


[0m[1mTest Summary: | [22m[32m[1mPass  [22m[39m[36m[1mTotal[22m[39m
wordplay      | [32m   5  [39m[36m    5[39m


Test.DefaultTestSet("wordplay", Any[], 5, false, false)

In [43]:
evaluate(model, ["hitchhiker", "kaashoek", "jellystone", "kierkegaard", "metallica", "maastrict", "menschheit"])

Description,P value,Observed,Expected
Number of occurrences of 'h',2.8e-97,▇▂▂▂,▇▁__
Number of occurrences of 'k',4.1e-28,▇▂▄__,▇▁___
Number of double letters,3e-05,_▇___,▇▂___
Number of occurrences of 'a',0.0024,▅_▇___,▇▆▁___
Has a 1-letter transdeletion,0.019,▇▅,▂▇
Has a transdeletion with letter 's',0.033,▇_,▇▅
Number of occurrences of 'j',0.043,▇▁_,▇__
Number of occurrences of 'l',0.061,▇_▃__,▇▃▁__
Scrabble score,0.087,_____________▇_____▄▇▄___▄_____________________,_____▁▂▃▄▅▆▇▇▇▆▅▄▄▃▂▂▁▁▁▁______________________
Has a 1-letter transaddition,0.22,▇▅,▄▇


In [46]:
# http://web.mit.edu/puzzle/www/2007/puzzles/1_1_1/
# @testset "1-1=1" begin
    results = evaluate(model, split("""
        STRIFE
        SEAMAN
        NIX
        ETCH
        POST
        QUEERART
        FOO
        TALKS
        REPAYS
        STU
        HUMF
        UNDERHID
        SIXTEENS
        BOWMEN
        """))
    @test description(first(results)) == "Has a 1-letter transdeletion"
# end


[91m[1mTest Failed[22m[39m at [39m[1mIn[46]:19[22m
  Expression: description(first(results)) == "Has a 1-letter transdeletion"
   Evaluated: "Has a transaddition with letter 'j'" == "Has a 1-letter transdeletion"


LoadError: [91mThere was an error during testing[39m

In [49]:
answers = Noticer.normalize.(split("""
        STRIFE
        SEAMAN
        NIX
        ETCH
        POST
        QUEERART
        FOO
        TALKS
        REPAYS
        STU
        HUMF
        UNDERHID
        SIXTEENS
        BOWMEN
        """))

14-element Vector{String}:
 "strife"
 "seaman"
 "nix"
 "etch"
 "post"
 "queerart"
 "foo"
 "talks"
 "repays"
 "stu"
 "humf"
 "underhid"
 "sixteens"
 "bowmen"

In [51]:
[w for w in Noticer.WORDS if Noticer.LetterTallies("repays") + 'j' == Noticer.LetterTallies(w)]

1-element Vector{String}:
 "jaspery"

In [50]:
[Noticer.has_transaddition(Noticer.LetterTallies(w), 'j') for w in answers]

14-element Vector{Bool}:
 0
 0
 1
 0
 0
 0
 0
 0
 1
 1
 0
 0
 0
 0

In [52]:
filter(Noticer.is_identical, results)

Description,P value,Observed,Expected
Has a 1-letter transdeletion,0.054,_▇,▂▇
Has a transdeletion with letter 'g',0.36,▇_,▇_
Number of occurrences of 'g',0.44,▇____,▇▂___
Has a transdeletion with letter 'w',0.53,▇_,▇_
Has a transdeletion with letter 'v',0.56,▇_,▇_
Has a transaddition with letter 'z',0.63,▇_,▇_
Has a transdeletion with letter 'z',0.71,▇_,▇_
Number of occurrences of 'v',0.76,▇___,▇▁__
Has a transdeletion with letter 'j',0.76,▇_,▇_
Has a transaddition with letter 'q',0.82,▇_,▇_


In [47]:
results

Description,P value,Observed,Expected
Has a transaddition with letter 'j',2.4e-12,▇▂,▇_
Has a transaddition with letter 'w',4.4e-09,▇▄,▇_
Has a transdeletion with letter 'x',2.7e-08,▇▁,▇_
Number of reverse alphabetical bigrams,1.2e-07,▄▅▇▅▄________,_▂▅▇▆▃▁______
Number of unique letters,2.1e-06,__▁▃▄▃▇▁_______,___▁▂▄▇▇▅▂▁____
Has a transaddition with letter 'l',9.7e-06,▇▇,▇▁
Has a transaddition with letter 'u',8.2e-05,▇▄,▇▁
Has a transaddition with letter 'h',9.4e-05,▇▄,▇▁
Has a transaddition with letter 'y',0.00039,▇▃,▇_
Number of unique vowels,0.00063,_▇▄▃___,_▂▇▆▂__
