In [1]:
import pandas as pd
import numpy as np
import itertools

from string import ascii_lowercase
from collections import defaultdict

In [2]:
WORD_LEN = 5
LETTERS = set(ascii_lowercase)

In [3]:
def load_common_words():
    df = pd.read_csv('unigrams5_freq.csv')
    df = df[['word', 'count']]
    return df

def load_words():
    five_letter_words = []

    with open('words.txt') as fr:
        for line in fr.readlines():
            word = line.strip().lower()
            intersect = set(word).intersection(LETTERS)
            if len(word) == 5 and len(intersect) == len(set(word)):
                five_letter_words.append(word)

    df = pd.DataFrame(five_letter_words, columns=['word'])
    return df


def load_wordle_words():
    with open('wordle_words.txt') as fr:
        words = [line.strip() for line in fr.readlines()]

    df = pd.DataFrame(words, columns=['word'])
    return df

In [4]:
df = load_wordle_words()
common_df = load_common_words()
wordle_entropy_df = pd.read_csv('wordle_words_entropy.csv')
df = pd.merge(df, common_df, how='left', on='word').fillna(0)
df = pd.merge(df, wordle_entropy_df, how='inner', on='word')
df.drop_duplicates(inplace=True)

In [5]:
df['disqualified'] = 0

In [6]:
df.head()

Unnamed: 0,word,count,entropy,disqualified
0,aback,0.0,3.5159,0
1,abase,0.0,4.5873,0
2,abate,0.0,4.6336,0
3,abbey,4224864.0,3.9329,0
4,abbot,0.0,3.988,0


## Play Game

In [70]:
class Wordled:
    def __init__(self, df):
        self.disqualified = set([])
        self.pinned = [None] * WORD_LEN
        self.unpinned = [set([]) for _ in range(WORD_LEN)]
        self.guesses = []
        self.must_have = set([])
        self.df = df
        self.patterns = list(itertools.product([-1, 0, 1], repeat=5))
        self.calculate_score()
        
    @staticmethod
    def sigmoid(n):
        return 1 / (1 + np.exp(-n))
        
    @staticmethod
    def pattern_diff(w1, w2):
        pattern = [None] * 5
        remaining_count = 5
        remaining = defaultdict(int)

        for l in w2:
            remaining[l] += 1

        for i, l in enumerate(w1):
            if w1[i] == w2[i]:
                pattern[i] = 1
                remaining[w1[i]] -= 1
                remaining_count -= 1

        if remaining_count > 0:
            for i, l in enumerate(w1):
                if w1[i] != w2[i]:
                    if remaining[w1[i]] > 0:
                        pattern[i] = 0
                        remaining[w1[i]] -= 1
                    else:
                        pattern[i] = -1

        return tuple(pattern)
    
    @staticmethod
    def entropy(probs):
        return round(sum([p * np.log2(1 / p) for p in probs if p > 0]), 4)

    def get_word_bits(self, word):
        bits = []
        for i, l in enumerate(word):
            p = self.letter_freq_with_pos[i][l] / self.letter_freq_with_pos[i]['total']
            bits.append(p * np.log2(1 / p))

        return round(sum(bits), 4)
        
    def calculate_letter_frequency(self):
        letter_freq_with_pos = defaultdict(lambda: defaultdict(int))

        for word in self.df.word:
            for i, l in enumerate(word):
                letter_freq_with_pos[i][l] += 1
                letter_freq_with_pos[i]['total'] += 1
        
        return letter_freq_with_pos
    
    def calculate_entropy(self):
        words_with_pattern_probs = defaultdict(lambda: defaultdict(int))

        for w1 in self.df.word:
            for w2 in self.df.word:
                pattern = Wordled.pattern_diff(w1, w2)
                words_with_pattern_probs[w1][pattern] += 1

        num_words = self.df.shape[0]

        for word, pattern_dict in words_with_pattern_probs.items():
            for pattern, count in pattern_dict.items():
                words_with_pattern_probs[word][pattern] = round(count / num_words, 6)

        entropies = [Wordled.entropy(patts.values()) for patts in words_with_pattern_probs.values()]
            
        return entropies

            
    def calculate_score(self):
        if self.df['count'].sum() > 0:
            self.df.loc[:, 'pop_score'] = (round(self.df['count'] / self.df['count'].sum(), 6)) * 100
            self.df.loc[:, 'pop_score'] = self.df.pop_score.apply(Wordled.sigmoid)
        
        self.letter_freq_with_pos = self.calculate_letter_frequency()
        self.df.loc[:, 'bits'] = self.df.word.apply(self.get_word_bits)
        score = 1 * self.df['entropy'] + 1 * self.df['pop_score'] + 1 * self.df['bits']
        self.df.loc[:, 'score'] = round(score, 4)


        
    def disqualify(self, word):
        for idx in range(WORD_LEN):
            if self.pinned[idx] and word[idx] != self.pinned[idx]:
                return 1
            if word[idx] in self.disqualified:
                return 1
            if word[idx] in self.unpinned[idx]:
                return 1

        return len(self.must_have.intersection(word)) != len(self.must_have)
    
    
    def record_guess(self, word, feedback):
        pins = [(i, l) for i, l in enumerate(word) if feedback[i] == 1]
        unpins = [(i, l) for i, l in enumerate(word) if feedback[i] == 0]
        present_letters = set(self.pinned + [l for i, l in pins] + [l for i, l in unpins])
        disqualified = [
            word[i] for i in range(WORD_LEN)
            if feedback[i] == -1
            and word[i] not in present_letters
        ]
        self.disqualified.update(set(disqualified))
                
        for idx, pin in pins:
            self.pinned[idx] = pin
        
        for idx, unpin in unpins:
            self.unpinned[idx].add(unpin)
            self.must_have.add(unpin)
            
        self.guesses.append(word)
        self.df.loc[:, 'disqualified'] = self.df.word.apply(self.disqualify)
        self.df = self.df.loc[self.df.disqualified == 0, :]
        self.df.loc[:, 'entropy'] = self.calculate_entropy()
        self.calculate_score()
        
    def guess(self, common_only=False, score_col='score'):
        if common_only:
            results = self.df[self.df.pop_score > 0] \
                .sort_values(by=score_col, ascending=False)
        else:
            results = self.df.sort_values(by=score_col, ascending=False)
        
        if len(results) > 0:
            return results.iloc[0].word
        
        return None
    
    def show_all_candidates(self, common_only=False, score_col='score'):
        if common_only:
            return self.df[self.df.pop_score > 0].sort_values(by=score_col, ascending=False)

        return self.df.sort_values(by=score_col, ascending=False)

### Games

In [286]:
def word_score(word, letter_freq):
    return sum([letter_freq[l] for l in word])

def get_letter_freqs(candidates):
    letter_freq = defaultdict(int)

    for word in candidates:
        for l in word:
            letter_freq[l] += 1
            
    return letter_freq

def pick_word(candidates, games, feedbacks=None):
    best_word, best_score = None, 0
    candidates_with_scores = []
    
    if feedbacks is not None:
        candidates = [
            c for i, c in enumerate(candidates)
            if sum(feedbacks[i]) != 5
        ]
            
    letter_freq = get_letter_freqs(candidates)
    
    for i, candidate in enumerate(candidates):
        n = games[i].show_all_candidates().shape[0] or 1000
        score = word_score(candidate, letter_freq) + (1/n) * 10
        candidates_with_scores.append((candidate, score))
        
        if score > best_score:
            best_word, best_score = candidate, score
            
    for c, s in candidates_with_scores:
        print('{} ({:.2f})'.format(c, s), end=' | ')
    print()
    
    return best_word

In [312]:
tdf = df.copy()

games = [Wordled(tdf) for _ in range(4)]

In [313]:
candidates = [game.guess() or 'N/A' for game in games]
word = pick_word(candidates, games)
print(word)

slate (20.00) | slate (20.00) | slate (20.00) | slate (20.00) | 
slate


In [314]:
feedbacks = [
    [-1, -1, -1, 0, 1],
    [-1, -1, 0, -1, 0],
    [1, -1, -1, 0, -1],
    [-1, -1, 1, -1, 0]
]

for i, game in enumerate(games):
    game.record_guess(word, feedback=feedbacks[i])
    
candidates = [game.guess() or 'N/A' for game in games]
word = pick_word(candidates, games, feedbacks)
print(word)

trice (12.59) | cream (12.16) | short (10.26) | heard (12.83) | 
heard


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


In [315]:
feedbacks = [
    [-1, 0, -1, 0, -1],
    [-1, 0, 0, 0, -1],
    [0, -1, -1, -1, -1],
    [-1, 1, 1, -1, 0]
]

for i, game in enumerate(games):
    game.record_guess(word, feedback=feedbacks[i])
    
candidates = [game.guess() or 'N/A' for game in games]
word = pick_word(candidates, games, feedbacks)
print(word)

trice (10.67) | gamer (9.40) | shout (8.00) | beady (18.00) | 
beady


In [317]:
feedbacks = [
    [-1, 0, -1, -1, -1],
    [-1, 0, 0, -1, -1],
    [-1, -1, -1, -1, -1],
    [1, 1, 1, 1, 1]
]


for i, game in enumerate(games):
    game.record_guess(word, feedback=feedbacks[i])
    
candidates = [game.guess() or 'N/A' for game in games]
word = pick_word(candidates, games, feedbacks)
print(word)

trice (10.00) | wager (7.50) | shout (8.00) | 
trice


In [318]:
feedbacks = [
    [1, 1, -1, -1, 1],
    [-1, 0, -1, 0, 0],
    [0, -1, -1, -1, -1],
    [1, 1, 1, 1, 1]
]

for i, game in enumerate(games):
    game.record_guess(word, feedback=feedbacks[i])
    
candidates = [game.guess() or 'N/A' for game in games]
word = pick_word(candidates, games, feedbacks)
print(word)

trope (16.00) | racer (15.00) | shoot (13.33) | 
trope


In [319]:
feedbacks = [
    [1, 1, 1, -1, 1],
    [-1, 0, -1, 0, 0],
    [0, -1, 1, -1, -1],
    [1, 1, 1, 1, 1]
]


for i, game in enumerate(games):
    game.record_guess(word, feedback=feedbacks[i])
    
candidates = [game.guess() or 'N/A' for game in games]
word = pick_word(candidates, games, feedbacks)
print(word)

trove (20.00) | caper (17.00) | shoot (15.00) | 
trove


In [320]:
for i, game in enumerate(games):
    print('--- GAME {} ---'.format(i + 1))
    print(game.show_all_candidates().head(10))
    print()

--- GAME 1 ---
       word  count  entropy disqualified  pop_score  bits  score
2118  trove    0.0      0.0        False        0.5   0.0    0.5

--- GAME 2 ---
      word  count  entropy  disqualified  pop_score  bits  score
332  caper    0.0      0.0         False        0.5   0.0    0.5

--- GAME 3 ---
       word       count  entropy disqualified  pop_score  bits  score
1735  shoot  12695618.0      1.0        False        1.0   0.5    2.5
1739  shout   3965861.0      1.0        False        1.0   0.5    2.5

--- GAME 4 ---
Empty DataFrame
Columns: [word, count, entropy, disqualified, pop_score, bits, score]
Index: []

