# Wordle

### Strategy 1
* Find the most common letters in words with N letters
* Find words without any repeating letters made of the most common letters
* Submit first guess and plug in learning from interactive cli

In [244]:
# Load the autoreload extension
%load_ext autoreload

# Autoreload reloads modules before executing code
# 0: disable
# 1: reload modules imported with %aimport
# 2: reload all modules, except those excluded by %aimport
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [245]:
import os
import nltk

from nltk.corpus import words

SRC_DIR = os.path.realpath(os.path.join(os.path.curdir, '..'))
NLTK_DIR = os.path.join(SRC_DIR, 'src/makewords/nltk_data')
nltk.data.path.append(NLTK_DIR)

In [246]:
# Prepare for analysis

import string
from itertools import chain

import numpy as np
import pandas as pd

N = 5  # target words with this number of letters
zs = ['z{}'.format(n) for n in range(N)]  # for counting appearances, index 0-5
ns = ['n{}'.format(n) for n in range(1,N)]  # for counting n appearances per word

def get_clean_df():
    df = pd.DataFrame(
        0,
        index=np.arange(26),
        columns=list(chain(['total'], list(zs), list(ns)))
    )
    df.insert(
        0,
        'letters',
        list(string.ascii_lowercase)
    )
    df = df.set_index('letters')
    return df

df = get_clean_df()
df.head(4)

Unnamed: 0_level_0,total,z0,z1,z2,z3,z4,n1,n2,n3,n4
letters,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
a,0,0,0,0,0,0,0,0,0,0
b,0,0,0,0,0,0,0,0,0,0
c,0,0,0,0,0,0,0,0,0,0
d,0,0,0,0,0,0,0,0,0,0


In [247]:
# Begin analysis
import collections

# TODO Clean and cache this
nl_words = [word.lower() for word in words.words() if len(word)==N]

def get_possible_words(df, letters=None, words=None):
    for word in nl_words:
        if letters is not None:
            for letter in letters:
                if letter not in word:
                    continue
        for i,j in enumerate(word):
            df.loc[j]['total'] += 1          # count the word once
            df.loc[j]['z{}'.format(i)] += 1  # count each register
        letter_count = dict(collections.Counter(word))
        for letter,n in letter_count.items():
            df.loc[letter]['n{}'.format(n)] += 1  # count appearances per word
    return df

df = get_possible_words(df)
df.head(4)

Unnamed: 0_level_0,total,z0,z1,z2,z3,z4,n1,n2,n3,n4
letters,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
a,5879,810,1837,1040,1162,1030,4167,805,34,0
b,1397,750,66,316,189,76,1185,100,4,0
c,1878,768,175,292,454,189,1664,104,2,0
d,1703,467,90,364,313,469,1499,96,4,0


In [248]:
def first_guess(df):
    # Take the most common letters
    chars = (
        df['total']
        .sort_values(axis='index', ascending=False)
        # .head(N)
        .keys()
        .to_list()
    )

    # Find words containing the most common letters with no repeats
    occurrences_by_register = (
        df.sort_values(by='total', ascending=False)
          .head(N)[zs]
          .stack()
          .sort_values(axis='index', ascending=False)
          .keys()
    )

    # Build the first guess solely by sorting based on relative occurences per register
    guess = {}
    for letter, reg in occurrences_by_register:
        if reg not in guess:
            if letter not in guess.values():
                guess[reg] = letter

    import operator
    f = operator.itemgetter(0)
    out = list(guess.items())
    out.sort(key=lambda x: x[0])
    res = ''.join([l for _,l in out])
    return res

res = first_guess(df)
print(res)

oarie


In [254]:
import makewords

makewords.possible_words('nibcdefghijklmpqrstuvwxyz', exclude='aon', length=5)

[makewords] Using en wordlist sourced from nltk.
beden
bedin
begin
begun
behen
being
bench
bendy
benet
benjy
benne
benny
bensh
benty
bepen
bepun
berne
besin
binge
bingy
birny
biune
bixin
blend
blent
blind
blink
blunk
blunt
brent
brine
bring
brink
briny
bruin
brunt
bunce
bunch
bundy
bungy
bunny
bunty
burin
burnt
burny
bysen
cense
cerin
cetin
cheng
chien
chine
ching
chink
chint
chunk
churn
cinch
cinct
cinel
cline
cling
clink
clint
clung
clunk
creen
crine
crink
crunk
crunt
ctene
cumin
cunye
curin
cutin
cynic
deben
deign
deink
denim
dense
denty
detin
didnt
diene
diner
dinge
dingy
dinic
dinky
dinus
dizen
dreng
drink
drinn
drung
drunk
dukhn
dunce
dunch
dungy
dunne
dunny
dunst
dwine
dying
eigne
eking
eldin
elfin
elsin
emend
encup
ended
ender
endew
endue
enemy
engem
ennui
enrib
enrut
ensky
ensue
enter
entry
enure
enzym
eshin
evens
event
eying
feign
feint
fence
fendy
fenks
fenny
ferny
fiend
fient
finch
finer
finis
finny
fling
flint
flung
flunk
flurn
fundi
funds
fungi
funis
funky
funny
genep
gen

['beden',
 'bedin',
 'begin',
 'begun',
 'behen',
 'being',
 'bench',
 'bendy',
 'benet',
 'benjy',
 'benne',
 'benny',
 'bensh',
 'benty',
 'bepen',
 'bepun',
 'berne',
 'besin',
 'binge',
 'bingy',
 'birny',
 'biune',
 'bixin',
 'blend',
 'blent',
 'blind',
 'blink',
 'blunk',
 'blunt',
 'brent',
 'brine',
 'bring',
 'brink',
 'briny',
 'bruin',
 'brunt',
 'bunce',
 'bunch',
 'bundy',
 'bungy',
 'bunny',
 'bunty',
 'burin',
 'burnt',
 'burny',
 'bysen',
 'cense',
 'cerin',
 'cetin',
 'cheng',
 'chien',
 'chine',
 'ching',
 'chink',
 'chint',
 'chunk',
 'churn',
 'cinch',
 'cinct',
 'cinel',
 'cline',
 'cling',
 'clink',
 'clint',
 'clung',
 'clunk',
 'creen',
 'crine',
 'crink',
 'crunk',
 'crunt',
 'ctene',
 'cumin',
 'cunye',
 'curin',
 'cutin',
 'cynic',
 'deben',
 'deign',
 'deink',
 'denim',
 'dense',
 'denty',
 'detin',
 'didnt',
 'diene',
 'diner',
 'dinge',
 'dingy',
 'dinic',
 'dinky',
 'dinus',
 'dizen',
 'dreng',
 'drink',
 'drinn',
 'drung',
 'drunk',
 'dukhn',
 'dunce',


In [255]:
makewords.possible_words('nibdefhijkmpqrstvwxyz', exclude='aonclug', length=5)

[makewords] Using en wordlist sourced from nltk.
beden
bedin
behen
bendy
benet
benjy
benne
benny
bensh
benty
bepen
berne
besin
birny
bixin
brent
brine
brink
briny
bysen
deben
deink
denim
dense
denty
detin
didnt
diene
diner
dinky
dizen
drink
drinn
dwine
emend
ended
ender
endew
enemy
enrib
ensky
enter
entry
enzym
eshin
evens
event
feint
fendy
fenks
fenny
ferny
fiend
fient
finer
finis
finny
hemen
hemin
henny
henry
herne
hinny
hymen
hynde
imine
impen
index
indri
inept
inerm
inert
infer
infit
infix
inken
inker
inket
inner
innet
insee
inset
inter
inwit
irene
jenny
jinks
jinni
jinny
keten
kinky
kneed
knezi
knife
kvint
mends
mense
mensk
mesne
miner
mines
minim
minny
minty
mixen
mneme
nebby
neddy
needs
needy
neese
neeze
neffy
neist
neper
nerve
nervy
nesty
neter
netty
never
newsy
nibby
nieve
nifty
nimbi
ninny
ninth
nippy
nisei
nisse
niter
nitid
nitty
nixie
nymph
nyxis
peine
pekin
penis
penni
penny
pensy
phene
piend
pindy
pined
piner
piney
pinky
pinny
pinte
pirny
preen
prine
prink
print
reins
ren

['beden',
 'bedin',
 'behen',
 'bendy',
 'benet',
 'benjy',
 'benne',
 'benny',
 'bensh',
 'benty',
 'bepen',
 'berne',
 'besin',
 'birny',
 'bixin',
 'brent',
 'brine',
 'brink',
 'briny',
 'bysen',
 'deben',
 'deink',
 'denim',
 'dense',
 'denty',
 'detin',
 'didnt',
 'diene',
 'diner',
 'dinky',
 'dizen',
 'drink',
 'drinn',
 'dwine',
 'emend',
 'ended',
 'ender',
 'endew',
 'enemy',
 'enrib',
 'ensky',
 'enter',
 'entry',
 'enzym',
 'eshin',
 'evens',
 'event',
 'feint',
 'fendy',
 'fenks',
 'fenny',
 'ferny',
 'fiend',
 'fient',
 'finer',
 'finis',
 'finny',
 'hemen',
 'hemin',
 'henny',
 'henry',
 'herne',
 'hinny',
 'hymen',
 'hynde',
 'imine',
 'impen',
 'index',
 'indri',
 'inept',
 'inerm',
 'inert',
 'infer',
 'infit',
 'infix',
 'inken',
 'inker',
 'inket',
 'inner',
 'innet',
 'insee',
 'inset',
 'inter',
 'inwit',
 'irene',
 'jenny',
 'jinks',
 'jinni',
 'jinny',
 'keten',
 'kinky',
 'kneed',
 'knezi',
 'knife',
 'kvint',
 'mends',
 'mense',
 'mensk',
 'mesne',
 'miner',
