In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

from wordleanalysis.wordle_game import Wordle, WordleInfinite

In [2]:
words = pd.read_csv("../datasets/words_with_scores.csv")

In [3]:
words

Unnamed: 0,words,letter_1,letter_2,letter_3,letter_4,letter_5,letter_1_positional_score,letter_1_general_score,letter_2_positional_score,letter_2_general_score,letter_3_positional_score,letter_3_general_score,letter_4_positional_score,letter_4_general_score,letter_5_positional_score,letter_5_general_score,total_positional_letter_scores,total_general_letter_scores
0,enzym,e,n,z,y,m,330,7455,388,3478,165,503,124,2400,227,2414,1234,16250
1,ethyl,e,t,h,y,l,330,7455,256,3707,146,1993,124,2400,539,3780,1395,19335
2,othyl,o,t,h,y,l,352,5212,256,3707,146,1993,124,2400,539,3780,1417,17092
3,ewhow,e,w,h,o,w,330,7455,177,1127,146,1993,827,5212,68,1127,1548,16914
4,udyog,u,d,y,o,g,217,2927,108,2735,246,2400,827,5212,171,1864,1569,15138
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14850,pares,p,a,r,e,s,1130,2436,2682,7128,1354,4714,2531,7455,4339,7319,12036,29052
14851,sones,s,o,n,e,s,1666,7319,2414,5212,1119,3478,2531,7455,4339,7319,12069,30783
14852,sales,s,a,l,e,s,1666,7319,2682,7128,973,3780,2531,7455,4339,7319,12191,33001
14853,sores,s,o,r,e,s,1666,7319,2414,5212,1354,4714,2531,7455,4339,7319,12304,32019


In [4]:
letter_cols = ['letter_1', 'letter_2', 'letter_3', 'letter_4', 'letter_5']

In [5]:
def get_number_of_guesses_using_letter_score_strategy(target_words, guess_words, game_count=None):
    max_guesses = 6

    wordle = WordleInfinite()
    num_guesses = []

    letter_masks = calculate_letter_masks(guess_words, letter_cols)

    if game_count is None:
        game_count = guess_words.shape[0]

    for word_index in range(game_count):
        wordle.create_new_game(target_words.iloc[word_index])

        guess_score = [0, 0, 0, 0, 0]
        words_matching_guess_mask = pd.Series(data=True, index=guess_words.index)

        for i in range(max_guesses):
            guess_word = guess_words[words_matching_guess_mask].iloc[0]["words"]
            guess_score = wordle.make_guess(guess_word)

            num_guesses, updated = update_num_guesses(num_guesses, guess_score, i)
            if updated:
                break

            words_matching_guess_mask = update_mask(letter_masks, words_matching_guess_mask, guess_word, guess_score, letter_cols)

    return pd.Series(num_guesses)


def update_num_guesses(num_guesses, guess_score, current_guess):
    total_score = calculate_score(guess_score)
    updated = False
    if total_score == 10:
        # print(f"Solved! The word '{guess_word}' was guessed after {i+1} attempts and the actual word was '{wordle.target_word}'")
        num_guesses.append(current_guess+1)
        updated = True
    elif current_guess == 5:
        num_guesses.append(7)
        updated = True

    return num_guesses, updated

def calculate_score(guess_score):
    total_score = 0
    for score in guess_score:
        total_score += score

    return total_score

def update_mask(letter_masks, words_matching_guess_mask, guess_word, guess_score, letter_cols):

    for letter_score, letter, letter_col in zip(guess_score, guess_word, letter_cols):
        mask = letter_masks[letter][letter_col]

        if letter_score != 2:
            mask = ~mask

            non_zero_occurance_count = 0
            for letter_j, guess_score_j in zip(guess_word, guess_score):
                if letter == letter_j and guess_score_j > 0:
                    non_zero_occurance_count += 1

            if letter_score == 1:
                mask = mask & (letter_masks[letter]["occurance_sum"] >= non_zero_occurance_count)
            else:
                mask = mask & (letter_masks[letter]["occurance_sum"] == non_zero_occurance_count)

        words_matching_guess_mask = words_matching_guess_mask & mask
    
    return words_matching_guess_mask


        # if 2 then filter for occurances in that postion
        # if no 2 then filter out occurances in that position
        # if 1 letter must occur at least as many times as 1s + 2s
        # if 0 letter must not occur more than number of 1s + 2s

def calculate_letter_masks(guess_words, letter_cols):
    all_letters = [
        'e', 's', 'a', 'o', 'r', 'i', 'l', 't', 'n', 'u', 'd', 'p', 'm',
        'y', 'c', 'h', 'g', 'b', 'k', 'f', 'w', 'v', 'z', 'j', 'x', 'q'
    ]

    letter_masks = {}
    for letter in all_letters:
        letter_masks[letter] = {}
        letter_masks[letter]["occurance_sum"] = pd.Series(data=0, index=guess_words.index)
        for letter_col in letter_cols:
            mask = guess_words[letter_col] == letter
            letter_masks[letter][letter_col] = mask
            letter_masks[letter]["occurance_sum"] = letter_masks[letter]["occurance_sum"] + mask

    return letter_masks

In [6]:
np.random.seed(10)
target_words = words["words"].sample(frac=1)

In [7]:
target_words

590      azoic
8505     roomy
5893     fitly
14535    wages
7533     sozin
         ...  
11633    mixis
1344     wagyu
12815    minos
7293     corso
1289     light
Name: words, Length: 14855, dtype: object

In [8]:
np.random.seed(25)
random_order = get_number_of_guesses_using_letter_score_strategy(target_words, words.sample(frac=1))

In [9]:
random_order.value_counts()

5    3952
4    3787
7    2645
6    2598
3    1679
2     193
1       1
Name: count, dtype: int64

In [10]:
pos_score_ascending = get_number_of_guesses_using_letter_score_strategy(target_words, words.sort_values("total_positional_letter_scores"))
pos_score_descending = get_number_of_guesses_using_letter_score_strategy(target_words, words.sort_values("total_positional_letter_scores", ascending=False))

In [11]:
gen_score_ascending = get_number_of_guesses_using_letter_score_strategy(target_words, words.sort_values(["total_general_letter_scores", "total_positional_letter_scores"]))
gen_score_descending = get_number_of_guesses_using_letter_score_strategy(target_words, words.sort_values(["total_general_letter_scores", "total_positional_letter_scores"], ascending=False))

In [12]:
def get_counts_df(guesses_required, type_val):
    guesses_required_count = guesses_required.value_counts()
    guesses_required_count = guesses_required_count.reset_index()
    guesses_required_count.columns = ["number_guesses", "occurance_count"]
    guesses_required_count["type"] = type_val
    return guesses_required_count

In [13]:
random_order_counts = get_counts_df(random_order, "Random Ordering Of Words")
pos_score_ascending_counts = get_counts_df(pos_score_ascending, "Positional Score Ascending")
pos_score_descending_counts = get_counts_df(pos_score_descending, "Positional Score Descending")
gen_score_ascending_counts = get_counts_df(gen_score_ascending, "Total Score Ascending")
gen_score_descending_counts = get_counts_df(gen_score_descending, "Total Score Descending")

guess_required = pd.concat([random_order_counts, pos_score_ascending_counts, pos_score_descending_counts, gen_score_ascending_counts, gen_score_descending_counts])

In [14]:
guess_required

Unnamed: 0,number_guesses,occurance_count,type
0,5,3952,Random Ordering Of Words
1,4,3787,Random Ordering Of Words
2,7,2645,Random Ordering Of Words
3,6,2598,Random Ordering Of Words
4,3,1679,Random Ordering Of Words
5,2,193,Random Ordering Of Words
6,1,1,Random Ordering Of Words
0,7,3996,Positional Score Ascending
1,5,3879,Positional Score Ascending
2,6,3824,Positional Score Ascending


In [15]:
px.bar(guess_required, x="number_guesses", y="occurance_count", color="type", barmode="group")