In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

from wordleanalysis.wordle_game import Wordle, WordleInfinite
from wordleanalysis.solvers.FilterSolver import FilterSolver

In [2]:
words = pd.read_csv("../datasets/words_with_scores.csv")

In [3]:
words

Unnamed: 0,words,letter_1,letter_2,letter_3,letter_4,letter_5,letter_1_positional_score,letter_1_general_score,letter_2_positional_score,letter_2_general_score,letter_3_positional_score,letter_3_general_score,letter_4_positional_score,letter_4_general_score,letter_5_positional_score,letter_5_general_score,total_positional_letter_scores,total_general_letter_scores
0,enzym,e,n,z,y,m,330,7455,388,3478,165,503,124,2400,227,2414,1234,16250
1,ethyl,e,t,h,y,l,330,7455,256,3707,146,1993,124,2400,539,3780,1395,19335
2,othyl,o,t,h,y,l,352,5212,256,3707,146,1993,124,2400,539,3780,1417,17092
3,ewhow,e,w,h,o,w,330,7455,177,1127,146,1993,827,5212,68,1127,1548,16914
4,udyog,u,d,y,o,g,217,2927,108,2735,246,2400,827,5212,171,1864,1569,15138
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14850,pares,p,a,r,e,s,1130,2436,2682,7128,1354,4714,2531,7455,4339,7319,12036,29052
14851,sones,s,o,n,e,s,1666,7319,2414,5212,1119,3478,2531,7455,4339,7319,12069,30783
14852,sales,s,a,l,e,s,1666,7319,2682,7128,973,3780,2531,7455,4339,7319,12191,33001
14853,sores,s,o,r,e,s,1666,7319,2414,5212,1354,4714,2531,7455,4339,7319,12304,32019


In [4]:
letter_cols = ['letter_1', 'letter_2', 'letter_3', 'letter_4', 'letter_5']

In [5]:
def get_number_of_guesses_using_letter_score_strategy(target_words:pd.Series, filter_order_words:pd.DataFrame, game_count=None):
    # max_guesses = 6

    wordle = WordleInfinite()
    num_guesses = []

    letter_masks = FilterSolver.calculate_letter_masks(filter_order_words, letter_cols)

    if game_count is None:
        game_count = target_words.shape[0]

    for word_index in range(game_count):
        wordle.create_new_game(target_words.iloc[word_index])

        filter_solver = FilterSolver(filter_order_words["words"], letter_masks)

        game_details = filter_solver.attempt_solve(wordle)

        num_guesses.append(game_details.number_guesses)

    return pd.Series(num_guesses)

In [16]:
np.random.seed(10)
target_words = words["words"].sample(frac=1)

In [17]:
target_words

590      azoic
8505     roomy
5893     fitly
14535    wages
7533     sozin
         ...  
11633    mixis
1344     wagyu
12815    minos
7293     corso
1289     light
Name: words, Length: 14855, dtype: object

In [18]:
np.random.seed(25)
random_order = get_number_of_guesses_using_letter_score_strategy(target_words, words.sample(frac=1))

In [19]:
random_order.value_counts()

 5    3952
 4    3787
-1    2645
 6    2598
 3    1679
 2     193
 1       1
Name: count, dtype: int64

In [20]:
pos_score_ascending = get_number_of_guesses_using_letter_score_strategy(target_words, words.sort_values("total_positional_letter_scores"))
pos_score_descending = get_number_of_guesses_using_letter_score_strategy(target_words, words.sort_values("total_positional_letter_scores", ascending=False))

In [21]:
gen_score_ascending = get_number_of_guesses_using_letter_score_strategy(target_words, words.sort_values(["total_general_letter_scores", "total_positional_letter_scores"]))
gen_score_descending = get_number_of_guesses_using_letter_score_strategy(target_words, words.sort_values(["total_general_letter_scores", "total_positional_letter_scores"], ascending=False))

In [39]:
def get_counts_df(guesses_required, type_val):
    guesses_required_count = guesses_required.value_counts()
    guesses_required_count = guesses_required_count.reset_index()
    guesses_required_count.columns = ["number_guesses", "occurance_count"]
    guesses_required_count["ordering_type"] = type_val
    return guesses_required_count

In [65]:
random_order_counts = get_counts_df(random_order, "Random Ordering Of Words")
pos_score_ascending_counts = get_counts_df(pos_score_ascending, "Positional Score Ascending")
pos_score_descending_counts = get_counts_df(pos_score_descending, "Positional Score Descending")
gen_score_ascending_counts = get_counts_df(gen_score_ascending, "Total Score Ascending")
gen_score_descending_counts = get_counts_df(gen_score_descending, "Total Score Descending")

guess_required = pd.concat([random_order_counts, pos_score_ascending_counts, pos_score_descending_counts, gen_score_ascending_counts, gen_score_descending_counts])

In [66]:
guess_required.loc[guess_required["number_guesses"] == -1, "number_guesses"] = 7

In [91]:
guess_required.sort_values(["ordering_type", "number_guesses"], inplace=True)
guess_required["occurance_count_sum"] = guess_required.groupby("ordering_type", observed=False)["occurance_count"].cumsum()

sorter_categories = ["Random Ordering Of Words", "Positional Score Ascending", "Positional Score Descending", "Total Score Ascending", "Total Score Descending"]
guess_required["ordering_type"] = pd.Categorical(guess_required["ordering_type"], sorter_categories)
guess_required.sort_values(["ordering_type", "number_guesses"], inplace=True)

In [92]:
guess_required["occurance_percent"] = (guess_required["occurance_count"] / len(words) * 100).round(2)
guess_required["occurance_percent_sum"] = (guess_required["occurance_count_sum"] / len(words) * 100).round(2)

In [93]:
guess_required

Unnamed: 0,number_guesses,occurance_count,ordering_type,occurance_count_sum,occurance_percent,occurance_percent_sum
6,1,1,Random Ordering Of Words,1,0.01,0.01
5,2,193,Random Ordering Of Words,194,1.3,1.31
4,3,1679,Random Ordering Of Words,1873,11.3,12.61
1,4,3787,Random Ordering Of Words,5660,25.49,38.1
0,5,3952,Random Ordering Of Words,9612,26.6,64.71
3,6,2598,Random Ordering Of Words,12210,17.49,82.19
2,7,2645,Random Ordering Of Words,14855,17.81,100.0
6,1,1,Positional Score Ascending,1,0.01,0.01
5,2,84,Positional Score Ascending,85,0.57,0.57
4,3,720,Positional Score Ascending,805,4.85,5.42


In [94]:
px.bar(guess_required, x="number_guesses", y="occurance_count", color="ordering_type", barmode="group")





In [95]:
px.line(guess_required, x="number_guesses", y="occurance_percent_sum", color="ordering_type", markers="circle")



