In [66]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

from wordle_game import Wordle, WordleInfinite

# Load dataset

In [67]:
words = pd.read_csv("./datasets/words.txt", header=None)
words.columns = ["words"]
words

Unnamed: 0,words
0,rossa
1,jetty
2,wizzo
3,cuppa
4,cohoe
...,...
14850,dunny
14851,decal
14852,fungs
14853,cadgy


In [68]:
for i in range(5):
    words[f"letter_{i+1}"] = words["words"].apply(lambda x: x[i])

words

Unnamed: 0,words,letter_1,letter_2,letter_3,letter_4,letter_5
0,rossa,r,o,s,s,a
1,jetty,j,e,t,t,y
2,wizzo,w,i,z,z,o
3,cuppa,c,u,p,p,a
4,cohoe,c,o,h,o,e
...,...,...,...,...,...,...
14850,dunny,d,u,n,n,y
14851,decal,d,e,c,a,l
14852,fungs,f,u,n,g,s
14853,cadgy,c,a,d,g,y


In [69]:
letter_cols = [f"letter_{i+1}" for i in range(5)]

In [70]:
all_letters = pd.concat([pd.DataFrame({"letters": words[col], "position": col, "count": 1}) for col in letter_cols])
letter_counts = all_letters.groupby(["letters", "position"]).sum()
letter_counts.reset_index(inplace=True)
total_counts = letter_counts.groupby("letters")[["count"]].sum()
total_counts.columns = ["total_counts"]
total_counts.reset_index(inplace=True)
letter_counts = letter_counts.merge(total_counts, on="letters")
letter_counts.sort_values(["total_counts", "position"], ascending=[False, True], inplace=True)
letter_counts

Unnamed: 0,letters,position,count,total_counts
20,e,letter_1,330,7455
21,e,letter_2,1857,7455
22,e,letter_3,998,7455
23,e,letter_4,2531,7455
24,e,letter_5,1739,7455
...,...,...,...,...
80,q,letter_1,103,145
81,q,letter_2,16,145
82,q,letter_3,18,145
83,q,letter_4,3,145


In [71]:
px.bar(letter_counts, x="letters", y="count", color="position", title="Occurance counts of letters")

In [72]:
vowel_list = ["a", "e", "i", "o", "u", "y"]
vowel_counts = letter_counts[letter_counts["letters"].isin(vowel_list)]
px.bar(vowel_counts, x="letters", y="count", color="position", title="Occurance counts of vowels")

In [73]:
vowel_list = ["a", "e", "i", "o", "u", "y"]
vowel_counts = letter_counts[~letter_counts["letters"].isin(vowel_list)]
px.bar(vowel_counts, x="letters", y="count", color="position", title="Occurance counts of consonants")

# Scoring using counts

In [74]:
pos_score_cols = []
gen_score_cols = []
for letter_col in letter_cols:
    pos_score_col = f"{letter_col}_positional_score"
    pos_score_cols.append(pos_score_col)
    gen_score_col = f"{letter_col}_general_score"
    gen_score_cols.append(gen_score_col)

    counts_for_letter_at_postion = letter_counts.loc[letter_counts["position"] == letter_col, ["letters", "count", "total_counts"]]
    counts_for_letter_at_postion.rename(columns={"count": pos_score_col, "total_counts": gen_score_col}, inplace=True)

    words = words.merge(counts_for_letter_at_postion, left_on=letter_col, right_on="letters")
    words.drop("letters", axis=1, inplace=True)

words["total_positional_letter_scores"] = words[pos_score_cols].sum(axis=1)
words.sort_values("total_positional_letter_scores", inplace=True)

words["total_general_letter_scores"] = words[gen_score_cols].sum(axis=1)

In [75]:
words

Unnamed: 0,words,letter_1,letter_2,letter_3,letter_4,letter_5,letter_1_positional_score,letter_1_general_score,letter_2_positional_score,letter_2_general_score,letter_3_positional_score,letter_3_general_score,letter_4_positional_score,letter_4_general_score,letter_5_positional_score,letter_5_general_score,total_positional_letter_scores,total_general_letter_scores
10890,enzym,e,n,z,y,m,330,7455,388,3478,165,503,124,2400,227,2414,1234,16250
11882,ethyl,e,t,h,y,l,330,7455,256,3707,146,1993,124,2400,539,3780,1395,19335
11881,othyl,o,t,h,y,l,352,5212,256,3707,146,1993,124,2400,539,3780,1417,17092
14408,ewhow,e,w,h,o,w,330,7455,177,1127,146,1993,827,5212,68,1127,1548,16914
14637,udyog,u,d,y,o,g,217,2927,108,2735,246,2400,827,5212,171,1864,1569,15138
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6647,pares,p,a,r,e,s,1130,2436,2682,7128,1354,4714,2531,7455,4339,7319,12036,29052
6736,sones,s,o,n,e,s,1666,7319,2414,5212,1119,3478,2531,7455,4339,7319,12069,30783
6575,sales,s,a,l,e,s,1666,7319,2682,7128,973,3780,2531,7455,4339,7319,12191,33001
6630,sores,s,o,r,e,s,1666,7319,2414,5212,1354,4714,2531,7455,4339,7319,12304,32019


In [76]:
mask = pd.Series(data=False, index=words.index)
for letter_col in letter_cols:
    for letter_col_2 in letter_cols:
        if letter_col != letter_col_2:
            mask = mask | (words[letter_col] == words[letter_col_2])

print(f"Dropping words with duplicated letters temp {mask.sum()}")
words.drop(words[mask].index, inplace=True)

Dropping words with duplicated letters temp 5490


In [77]:
words

Unnamed: 0,words,letter_1,letter_2,letter_3,letter_4,letter_5,letter_1_positional_score,letter_1_general_score,letter_2_positional_score,letter_2_general_score,letter_3_positional_score,letter_3_general_score,letter_4_positional_score,letter_4_general_score,letter_5_positional_score,letter_5_general_score,total_positional_letter_scores,total_general_letter_scores
10890,enzym,e,n,z,y,m,330,7455,388,3478,165,503,124,2400,227,2414,1234,16250
11882,ethyl,e,t,h,y,l,330,7455,256,3707,146,1993,124,2400,539,3780,1395,19335
11881,othyl,o,t,h,y,l,352,5212,256,3707,146,1993,124,2400,539,3780,1417,17092
14637,udyog,u,d,y,o,g,217,2927,108,2735,246,2400,827,5212,171,1864,1569,15138
14414,upbow,u,p,b,o,w,217,2927,249,2436,386,1849,827,5212,68,1127,1747,13551
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6752,panes,p,a,n,e,s,1130,2436,2682,7128,1119,3478,2531,7455,4339,7319,11801,27816
6646,mares,m,a,r,e,s,951,2414,2682,7128,1354,4714,2531,7455,4339,7319,11857,29030
6642,cares,c,a,r,e,s,970,2246,2682,7128,1354,4714,2531,7455,4339,7319,11876,28862
6644,bares,b,a,r,e,s,1003,1849,2682,7128,1354,4714,2531,7455,4339,7319,11909,28465


In [78]:
def get_number_of_guesses_using_letter_score_strategy(target_words, guess_words, game_count=1000):
    max_guesses = 6

    wordle = WordleInfinite()
    num_guesses = []

    for word_index in range(game_count):
        wordle.create_new_game(target_words.iloc[word_index])

        guess_score = [0, 0, 0, 0, 0]
        words_matching_guess_mask = pd.Series(data=True, index=guess_words.index)

        for i in range(max_guesses):
            guess_word = guess_words[words_matching_guess_mask].iloc[0]["words"]
            guess_score = wordle.make_guess(guess_word)

            total_score = 0
            for score in guess_score:
                total_score += score

            if total_score == 10:
                # print(f"Solved! The word '{guess_word}' was guessed after {i+1} attempts and the actual word was '{wordle.target_word}'")
                num_guesses.append(i+1)
                break
            elif i == 5:
                num_guesses.append(7)
                break

            for letter_score, letter, letter_col in zip(guess_score, guess_word, letter_cols):
                if letter_score == 2:
                    mask = guess_words[letter_col] == letter
                elif letter_score == 1:
                    mask = pd.Series(data=False, index=guess_words.index)
                    for letter_col_2 in letter_cols:
                        if letter_col_2 != letter_col:
                            iter_mask = guess_words[letter_col_2] == letter
                            mask = mask | (iter_mask)
                else:
                    mask = pd.Series(data=True, index=guess_words.index)
                    # if letter not in guess_word[:]
                    for letter_col_2 in letter_cols:
                        iter_mask = guess_words[letter_col_2] == letter
                        mask = mask & (~iter_mask)

                words_matching_guess_mask = words_matching_guess_mask & mask

    return pd.Series(num_guesses)


In [79]:
np.random.seed(10)
target_words = words["words"].sample(frac=1)

In [80]:
target_words

1086     bucko
2907     swive
6909     antes
14096    polar
4232     baldy
         ...  
1821     blate
12296    saved
1042     rubio
10588    tehrs
2435     amble
Name: words, Length: 9365, dtype: object

In [81]:
np.random.seed(10)
random_order = get_number_of_guesses_using_letter_score_strategy(target_words, words.sample(frac=1))

In [82]:
pos_score_ascending = get_number_of_guesses_using_letter_score_strategy(target_words, words.sort_values("total_positional_letter_scores"))
pos_score_descending = get_number_of_guesses_using_letter_score_strategy(target_words, words.sort_values("total_positional_letter_scores", ascending=False))

In [83]:
gen_score_ascending = get_number_of_guesses_using_letter_score_strategy(target_words, words.sort_values(["total_general_letter_scores", "total_positional_letter_scores"]))
gen_score_descending = get_number_of_guesses_using_letter_score_strategy(target_words, words.sort_values(["total_general_letter_scores", "total_positional_letter_scores"], ascending=False))

In [84]:
def get_counts_df(guesses_required, type_val):
    guesses_required_count = guesses_required.value_counts()
    guesses_required_count = guesses_required_count.reset_index()
    guesses_required_count.columns = ["number_guesses", "occurance_count"]
    guesses_required_count["type"] = type_val
    return guesses_required_count

In [85]:
random_order_counts = get_counts_df(random_order, "Random Ordering Of Words")
pos_score_ascending_counts = get_counts_df(pos_score_ascending, "Positional Score Ascending")
pos_score_descending_counts = get_counts_df(pos_score_descending, "Positional Score Descending")
gen_score_ascending_counts = get_counts_df(gen_score_ascending, "Total Score Ascending")
gen_score_descending_counts = get_counts_df(gen_score_descending, "Total Score Descending")

guess_required = pd.concat([random_order_counts, pos_score_ascending_counts, pos_score_descending_counts, gen_score_ascending_counts, gen_score_descending_counts])

In [86]:
guess_required

Unnamed: 0,number_guesses,occurance_count,type
0,4,440,Random Ordering Of Words
1,3,362,Random Ordering Of Words
2,5,118,Random Ordering Of Words
3,2,68,Random Ordering Of Words
4,6,10,Random Ordering Of Words
5,1,1,Random Ordering Of Words
6,7,1,Random Ordering Of Words
0,5,363,Positional Score Ascending
1,4,243,Positional Score Ascending
2,6,199,Positional Score Ascending


In [87]:
px.bar(guess_required, x="number_guesses", y="occurance_count", color="type", barmode="group")