In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

from wordleanalysis.wordle_game import Wordle, WordleInfinite
from wordleanalysis.solvers.FilterSolver import FilterSolver

In [2]:
words = pd.read_csv("../datasets/words_with_scores.csv")

In [3]:
words

Unnamed: 0,words,letter_1,letter_2,letter_3,letter_4,letter_5,letter_1_positional_score,letter_1_general_score,letter_2_positional_score,letter_2_general_score,letter_3_positional_score,letter_3_general_score,letter_4_positional_score,letter_4_general_score,letter_5_positional_score,letter_5_general_score,total_positional_letter_scores,total_general_letter_scores
0,enzym,e,n,z,y,m,330,7455,388,3478,165,503,124,2400,227,2414,1234,16250
1,ethyl,e,t,h,y,l,330,7455,256,3707,146,1993,124,2400,539,3780,1395,19335
2,othyl,o,t,h,y,l,352,5212,256,3707,146,1993,124,2400,539,3780,1417,17092
3,ewhow,e,w,h,o,w,330,7455,177,1127,146,1993,827,5212,68,1127,1548,16914
4,udyog,u,d,y,o,g,217,2927,108,2735,246,2400,827,5212,171,1864,1569,15138
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14850,pares,p,a,r,e,s,1130,2436,2682,7128,1354,4714,2531,7455,4339,7319,12036,29052
14851,sones,s,o,n,e,s,1666,7319,2414,5212,1119,3478,2531,7455,4339,7319,12069,30783
14852,sales,s,a,l,e,s,1666,7319,2682,7128,973,3780,2531,7455,4339,7319,12191,33001
14853,sores,s,o,r,e,s,1666,7319,2414,5212,1354,4714,2531,7455,4339,7319,12304,32019


In [4]:
letter_cols = ['letter_1', 'letter_2', 'letter_3', 'letter_4', 'letter_5']

In [5]:
def get_game_details_using_letter_score_strategy(target_words:pd.Series, filter_order_words:pd.DataFrame, game_count=None):
    # max_guesses = 6

    wordle = WordleInfinite()
    game_details_list = []

    letter_masks = FilterSolver.calculate_letter_masks(filter_order_words, letter_cols)

    if game_count is None:
        game_count = target_words.shape[0]

    for word_index in range(game_count):
        wordle.create_new_game(target_words.iloc[word_index])

        filter_solver = FilterSolver(filter_order_words["words"], letter_masks)

        game_details = filter_solver.attempt_solve(wordle)

        game_details_dict = game_details.get_details_dict()
        game_details_dict["target_word"] = wordle.target_word
        game_details_list.append(game_details_dict)

    return pd.DataFrame(game_details_list)

In [6]:
np.random.seed(99)
game_results = get_game_details_using_letter_score_strategy(words["words"].sample(frac=1), words.sample(frac=1), 20)

In [7]:
game_results

Unnamed: 0,guesses,guess_scores,final_number_guesses,target_word
0,"[shank, croon, zendo, pungo, lingo, jingo]","[[0, 0, 0, 1, 0], [0, 0, 1, 0, 1], [0, 0, 2, 0...",-1,bingo
1,"[shank, jiaos, roars, ofays, ovals, ogams]","[[1, 0, 2, 0, 0], [0, 0, 2, 1, 2], [0, 1, 2, 0...",6,ogams
2,"[shank, gurks, dykes, kibes, okies]","[[1, 0, 0, 0, 1], [0, 0, 0, 1, 2], [0, 0, 1, 2...",5,okies
3,"[shank, pubco, gripe, tepee]","[[0, 0, 0, 0, 0], [1, 0, 0, 0, 0], [0, 0, 0, 1...",4,tepee
4,"[shank, hosel, heths, herds, herbs]","[[1, 1, 0, 0, 0], [2, 0, 1, 1, 0], [2, 2, 0, 0...",5,herbs
5,"[shank, aorta, porae, goray, loral]","[[0, 0, 1, 0, 0], [1, 2, 2, 0, 0], [0, 2, 2, 2...",5,loral
6,"[shank, girrs, wefts, flobs, yoofs, doofs]","[[1, 0, 0, 0, 0], [0, 0, 0, 0, 2], [0, 0, 1, 0...",6,doofs
7,"[shank, trayf, guava, blaud, imaum]","[[0, 0, 2, 0, 0], [0, 0, 2, 0, 0], [0, 1, 2, 0...",5,imaum
8,"[shank, gurks, yeuks, zouks, bouks, jouks]","[[1, 0, 0, 0, 1], [0, 1, 0, 2, 2], [0, 0, 2, 2...",-1,pouks
9,"[shank, drank, crank, brank]","[[0, 0, 2, 2, 2], [0, 2, 2, 2, 2], [0, 2, 2, 2...",4,brank


In [8]:
def get_exploded_details(game_results:pd.DataFrame) -> pd.DataFrame:
    game_results["guess_number"] = game_results["guesses"].apply(lambda x: list(range(1, len(x)+1)))
    game_results = game_results.explode(["guesses", "guess_scores", "guess_number"])
    game_results.index.name = "word_index"
    game_results.reset_index(inplace=True)

    letter_score_cols = [f"letter_{i}_score" for i in range(1, 6)]
    game_results[letter_score_cols] = pd.DataFrame(game_results["guess_scores"].tolist(), index= game_results.index)

    return game_results

In [9]:
game_results = get_exploded_details(game_results)

In [10]:
game_results.head(20)

Unnamed: 0,word_index,guesses,guess_scores,final_number_guesses,target_word,guess_number,letter_1_score,letter_2_score,letter_3_score,letter_4_score,letter_5_score
0,0,shank,"[0, 0, 0, 1, 0]",-1,bingo,1,0,0,0,1,0
1,0,croon,"[0, 0, 1, 0, 1]",-1,bingo,2,0,0,1,0,1
2,0,zendo,"[0, 0, 2, 0, 2]",-1,bingo,3,0,0,2,0,2
3,0,pungo,"[0, 0, 2, 2, 2]",-1,bingo,4,0,0,2,2,2
4,0,lingo,"[0, 2, 2, 2, 2]",-1,bingo,5,0,2,2,2,2
5,0,jingo,"[0, 2, 2, 2, 2]",-1,bingo,6,0,2,2,2,2
6,1,shank,"[1, 0, 2, 0, 0]",6,ogams,1,1,0,2,0,0
7,1,jiaos,"[0, 0, 2, 1, 2]",6,ogams,2,0,0,2,1,2
8,1,roars,"[0, 1, 2, 0, 2]",6,ogams,3,0,1,2,0,2
9,1,ofays,"[2, 0, 2, 0, 2]",6,ogams,4,2,0,2,0,2


# Comparing different orderings of the filter set

In [11]:
np.random.seed(10)
target_words = words["words"].sample(frac=1)

In [12]:
target_words

590      azoic
8505     roomy
5893     fitly
14535    wages
7533     sozin
         ...  
11633    mixis
1344     wagyu
12815    minos
7293     corso
1289     light
Name: words, Length: 14855, dtype: object

In [13]:
np.random.seed(25)
random_order = get_game_details_using_letter_score_strategy(target_words, words.sample(frac=1))

In [14]:
random_order

Unnamed: 0,guesses,guess_scores,final_number_guesses,target_word
0,"[gares, yowza, azoth, azoic]","[[0, 1, 0, 0, 0], [0, 1, 0, 1, 1], [2, 2, 2, 0...",4,azoic
1,"[gares, fruit, rhomb, roomy]","[[0, 0, 1, 0, 0], [0, 1, 0, 0, 0], [2, 0, 2, 2...",4,roomy
2,"[gares, hullo, twilt, fitly]","[[0, 0, 0, 0, 0], [0, 0, 0, 2, 0], [1, 0, 1, 2...",4,fitly
3,"[gares, wages]","[[1, 2, 0, 2, 2], [2, 2, 2, 2, 2]]",2,wages
4,"[gares, skivy, squiz, sozin]","[[0, 0, 0, 0, 1], [2, 0, 1, 0, 0], [2, 0, 0, 2...",4,sozin
...,...,...,...,...
14850,"[gares, sukhs, jibbs, lilos, divis, fifis]","[[0, 0, 0, 0, 2], [0, 0, 0, 0, 2], [0, 2, 0, 0...",-1,mixis
14851,"[gares, manga, fagot, wagyu]","[[1, 2, 0, 0, 0], [0, 2, 0, 1, 0], [0, 2, 2, 0...",4,wagyu
14852,"[gares, sukhs, jibbs, lilos, fidos, pivos]","[[0, 0, 0, 0, 2], [0, 0, 0, 0, 2], [0, 2, 0, 0...",-1,minos
14853,"[gares, skroo, torso, corso]","[[0, 0, 2, 0, 1], [1, 0, 2, 1, 2], [0, 2, 2, 2...",4,corso


In [15]:
pos_score_ascending = get_game_details_using_letter_score_strategy(target_words, words.sort_values("total_positional_letter_scores"))
pos_score_descending = get_game_details_using_letter_score_strategy(target_words, words.sort_values("total_positional_letter_scores", ascending=False))

In [16]:
gen_score_ascending = get_game_details_using_letter_score_strategy(target_words, words.sort_values(["total_general_letter_scores", "total_positional_letter_scores"]))
gen_score_descending = get_game_details_using_letter_score_strategy(target_words, words.sort_values(["total_general_letter_scores", "total_positional_letter_scores"], ascending=False))

In [17]:
def get_counts_df(guesses_required:pd.DataFrame, type_val:str):
    guesses_required_count = guesses_required["final_number_guesses"].value_counts()
    guesses_required_count = guesses_required_count.reset_index()
    guesses_required_count.columns = ["number_guesses", "occurance_count"]
    guesses_required_count["ordering_type"] = type_val
    return guesses_required_count

In [18]:
random_order_counts = get_counts_df(random_order, "Random Ordering Of Words")
pos_score_ascending_counts = get_counts_df(pos_score_ascending, "Positional Score Ascending")
pos_score_descending_counts = get_counts_df(pos_score_descending, "Positional Score Descending")
gen_score_ascending_counts = get_counts_df(gen_score_ascending, "Total Score Ascending")
gen_score_descending_counts = get_counts_df(gen_score_descending, "Total Score Descending")

guess_required = pd.concat([random_order_counts, pos_score_ascending_counts, pos_score_descending_counts, gen_score_ascending_counts, gen_score_descending_counts])

In [19]:
guess_required.loc[guess_required["number_guesses"] == -1, "number_guesses"] = 7

In [20]:
guess_required.sort_values(["ordering_type", "number_guesses"], inplace=True)
guess_required["occurance_count_sum"] = guess_required.groupby("ordering_type", observed=False)["occurance_count"].cumsum()

sorter_categories = ["Random Ordering Of Words", "Positional Score Ascending", "Positional Score Descending", "Total Score Ascending", "Total Score Descending"]
guess_required["ordering_type"] = pd.Categorical(guess_required["ordering_type"], sorter_categories)
guess_required.sort_values(["ordering_type", "number_guesses"], inplace=True)

In [21]:
guess_required["occurance_percent"] = (guess_required["occurance_count"] / len(words) * 100).round(2)
guess_required["occurance_percent_sum"] = (guess_required["occurance_count_sum"] / len(words) * 100).round(2)

In [22]:
guess_required

Unnamed: 0,number_guesses,occurance_count,ordering_type,occurance_count_sum,occurance_percent,occurance_percent_sum
6,1,1,Random Ordering Of Words,1,0.01,0.01
5,2,193,Random Ordering Of Words,194,1.3,1.31
4,3,1679,Random Ordering Of Words,1873,11.3,12.61
1,4,3787,Random Ordering Of Words,5660,25.49,38.1
0,5,3952,Random Ordering Of Words,9612,26.6,64.71
3,6,2598,Random Ordering Of Words,12210,17.49,82.19
2,7,2645,Random Ordering Of Words,14855,17.81,100.0
6,1,1,Positional Score Ascending,1,0.01,0.01
5,2,84,Positional Score Ascending,85,0.57,0.57
4,3,720,Positional Score Ascending,805,4.85,5.42


In [23]:
px.bar(guess_required, x="number_guesses", y="occurance_count", color="ordering_type", barmode="group")

  grouped = df.groupby(required_grouper, sort=False)  # skip one_group groupers


In [24]:
px.line(guess_required, x="number_guesses", y="occurance_percent_sum", color="ordering_type", markers="circle")



