## Imports

In [38]:
import numpy as np
import random
import operator
import time
import pandas as pd
from wordle_functions import *

## Importing datasets

### official words
- official wordle word list

In [2]:
### Official list
official_words = []

with open("data/official_words_processed.txt", "r", encoding = "utf-8") as f:
    for word in f.read().split("\n"):
        official_words.append(word)

f.close() # closes connection to file

print(len(official_words))
official_words[:5]

2310


['wince', 'thyme', 'mower', 'horde', 'heard']

### alternative list 1
- an alternate list of 5-letter words found on the web

In [3]:
### Official list
alt_words_1 = []

with open("data/alt_words_1.txt", "r", encoding = "utf-8") as f:
    for word in f.read().split("\n"):
        alt_words_1.append(word)

f.close() # closes connection to file

print(len(alt_words_1))
alt_words_1[:5]

14856


['rossa', 'jetty', 'wizzo', 'cuppa', 'cohoe']

### nltk grand corpus
- Amalgamation of all words in various NLTK corpora to have as big a dataset as possible
- Developed manually

In [36]:
### grand corpus tokens
nltk_tokens = []

with open("data/nltk_grand_corpus_tokens_5.txt", "r", encoding = "utf-8") as f:
    for word in f.read().split("\n"):
        nltk_tokens.append(word)

f.close() # closes connection to file

print(len(nltk_tokens))
nltk_tokens[:5]

535189


['years', 'board', 'dutch', 'group', 'agnew']

### nltk grand corpus types and counts

In [5]:
### grand corpus types and counts
nltk_counts = {}

with open("data/nltk_grand_corpus_types_and_counts_5.txt", "r", encoding = "utf-8") as f:
    for line in f.read().split("\n"):
        if len(line.split("\t")) == 2:
            word = line.split("\t")[0]
            count = line.split("\t")[1]
            nltk_counts[word] = count
        else:
            continue

f.close() # closes connection to file

print(len(nltk_counts))
nltk_counts['which']

8043


'15760'

In [6]:
### Official list
official_words = []

with open("data/official_words_processed.txt", "r", encoding = "utf-8") as f:
    for word in f.read().split("\n"):
        if len(word) > 0: # there's one blank entry at the start
            official_words.append(word)

f.close() # closes connection to file

print(len(official_words))
official_words[:10]

2309


['wince',
 'thyme',
 'mower',
 'horde',
 'heard',
 'tenor',
 'zonal',
 'parry',
 'shied',
 'fizzy']

## Wordle functions + Testing

### Testing `wordle_wizard()`

In [7]:
for val in [False, True]:
        wordle_wizard(word_list = official_words, max_guesses = 6, 
                guess = "arose", target = "syrup", bias = 'entropy', 
                random_guess = False, random_target = False, 
                verbose = val, drama = 0, return_stats = False, record = True)

-----------------------------

Guess 1: 'arose'
Guess 2: 'shirt'
Guess 3: 'surly'
Guess 4: 'syrup'

Congratulations! The Wordle has been solved in 4 guesses!
There were still 2 guesses remaining.

The target word was 'syrup'.

-----------------------------
-----------------------------

Guess 1: 'arose'
Letters in correct positions:
	[]

Letters in incorrect positions:
	[('r', 1), ('s', 3)]

Letters to guess again:
	['r', 's']

Letters to not guess again:
	['a', 'e', 'o']

At this point:
	2288, 99.09% of total words have been eliminated, and
	21, 0.91% of total words remain possible.

All potential next guesses:
	[('shirt', 100.0), ('strip', 97.58), ('rusty', 82.49), ('skirt', 81.4), ('surly', 81.16), ('swirl', 78.38), ('spurt', 76.33), ('slurp', 75.0), ('spurn', 60.02), ('sprig', 53.86), ('risky', 49.52), ('virus', 47.83), ('shirk', 45.65), ('scrum', 44.57), ('syrup', 44.44), ('scrub', 40.82), ('smirk', 38.29), ('strut', 38.16), ('shrug', 34.78), ('shrub', 31.64), ('usurp', 0.0)]

Wor

### Testing on 3-letter words

In [8]:
### 3 letters
words_3_letters = []
words_3_types_counts = {}

with open("data/nltk_grand_corpus_types_and_counts_3.txt", "r", encoding = "utf-8") as f:
    for line in f.read().split("\n"):
        word_freq = line.split("\t")
        if len(word_freq) == 2: # how many items are in each line, NOT the len of the word in the line
            word = word_freq[0]
            freq = word_freq[1]
            if word.isascii() == True:
                words_3_letters.append(word)
                words_3_types_counts[word] = freq

f.close() # closes connection to file

print(len(words_3_letters))
print(words_3_letters[:5])
words_3_types_counts['the']

1531
['the', 'and', 'for', 'his', 'was']


'286732'

In [9]:
for val in [False, True]:
        wordle_wizard(word_list = words_3_letters, max_guesses = 6, 
                guess = "the", target = "his", bias = 'entropy', 
                random_guess = False, random_target = False, 
                verbose = val, drama = 0, return_stats = False, record = True)

-----------------------------

Guess 1: 'the'
Guess 2: 'hoa'
Guess 3: 'his'

Congratulations! The Wordle has been solved in 3 guesses!
There were still 3 guesses remaining.

The target word was 'his'.

-----------------------------
-----------------------------

Guess 1: 'the'
Letters in correct positions:
	[]

Letters in incorrect positions:
	[('h', 1)]

Letters to guess again:
	['h']

Letters to not guess again:
	['e', 't']

At this point:
	1447, 94.51% of total words have been eliminated, and
	84, 5.49% of total words remain possible.

The top 40 potential next guesses are:
	[('hoa', 100.0), ('hai', 93.36), ('ash', 80.38), ('has', 80.38), ('hua', 77.56), ('anh', 77.56), ('han', 77.56), ('nah', 77.56), ('har', 76.41), ('ham', 73.89), ('mah', 73.89), ('hal', 72.06), ('hap', 71.6), ('pah', 71.6), ('dah', 70.92), ('had', 70.92), ('hab', 68.24), ('bah', 68.24), ('agh', 64.66), ('hag', 64.66), ('hos', 64.43), ('haw', 63.13), ('wah', 63.13), ('hay', 62.98), ('hon', 61.6), ('hou', 61.6), ('

### Testing on 4-letter words

In [10]:
### 3 letters
words_4_letters = []
words_4_types_counts = {}

with open("data/nltk_grand_corpus_types_and_counts_4.txt", "r", encoding = "utf-8") as f:
    for line in f.read().split("\n"):
        word_freq = line.split("\t")
        if len(word_freq) == 2: # how many items are in each line, NOT the len of the word in the line
            word = word_freq[0]
            freq = word_freq[1]
            if word.isascii() == True:
                words_4_letters.append(word)
                words_4_types_counts[word] = freq

f.close() # closes connection to file

print(len(words_4_letters))
print(words_4_letters[:5])
words_4_types_counts['that']

4266
['that', 'with', 'this', 'they', 'have']


'57994'

In [11]:
for val in [False, True]:
        wordle_wizard(word_list = words_4_letters, max_guesses = 6, 
                guess = "have", target = "this", bias = 'entropy', 
                random_guess = False, random_target = False, 
                verbose = val, drama = 0, return_stats = False, record = True)

-----------------------------

Guess 1: 'have'
Guess 2: 'shor'
Guess 3: 'this'

Congratulations! The Wordle has been solved in 3 guesses!
There were still 3 guesses remaining.

The target word was 'this'.

-----------------------------
-----------------------------

Guess 1: 'have'
Letters in correct positions:
	[]

Letters in incorrect positions:
	[('h', 0)]

Letters to guess again:
	['h']

Letters to not guess again:
	['a', 'e', 'v']

At this point:
	4101, 96.13% of total words have been eliminated, and
	165, 3.87% of total words remain possible.

The top 40 potential next guesses are:
	[('rosh', 100.0), ('shor', 100.0), ('shon', 97.4), ('sohn', 97.4), ('soth', 95.92), ('shot', 95.92), ('iohn', 93.08), ('itoh', 91.54), ('shin', 88.41), ('shou', 88.29), ('thor', 88.11), ('thro', 88.11), ('roth', 88.11), ('orth', 88.11), ('shod', 87.94), ('sith', 86.87), ('tish', 86.87), ('this', 86.87), ('shit', 86.87), ('loth', 86.58), ('mosh', 86.34), ('thon', 85.51), ('shop', 83.38), ('posh', 83.38

### Testing on 6-letter words

In [12]:
### 6 letters
words_6_letters = []
words_6_types_counts = {}

with open("data/nltk_grand_corpus_types_and_counts_6.txt", "r", encoding = "utf-8") as f:
    for line in f.read().split("\n"):
        word_freq = line.split("\t")
        if len(word_freq) == 2: # how many items are in each line, NOT the len of the word in the line
            word = word_freq[0]
            freq = word_freq[1]
            if word.isascii() == True:
                words_6_letters.append(word)
                words_6_types_counts[word] = freq

f.close() # closes connection to file

print(len(words_6_letters))
print(words_6_letters[:5])
words_6_types_counts[words_6_letters[0]]

11290
['little', 'before', 'people', 'should', 'things']


'5543'

In [13]:
for val in [False, True]:
        wordle_wizard(word_list = words_6_letters, max_guesses = 6, 
                guess = "little", target = "before", bias = 'entropy', 
                random_guess = False, random_target = False, 
                verbose = val, drama = 0, return_stats = False, record = True)

-----------------------------

Guess 1: 'little'
Guess 2: 'sarone'
Guess 3: 'upmore'
Guess 4: 'decore'
Guess 5: 'before'

Congratulations! The Wordle has been solved in 5 guesses!
There were still 1 guesses remaining.

The target word was 'before'.

-----------------------------
-----------------------------

Guess 1: 'little'
Letters in correct positions:
	[('e', 5)]

Letters in incorrect positions:
	[]

Letters to guess again:
	['e']

Letters to not guess again:
	['i', 'l', 't']

At this point:
	10880, 96.37% of total words have been eliminated, and
	410, 3.63% of total words remain possible.

The top 40 potential next guesses are:
	[('sarone', 100.0), ('arouse', 92.3), ('hoarse', 91.81), ('ashore', 91.81), ('coarse', 91.12), ('romane', 86.05), ('orange', 84.29), ('groane', 84.29), ('organe', 84.29), ('scorne', 82.78), ('sundae', 82.69), ('phrase', 81.14), ('sharpe', 81.14), ('scrape', 80.44), ('dosage', 79.56), ('grande', 79.02), ('coahse', 78.65), ('graspe', 78.56), ('drouse', 78.5

### Testing on 7-letter words

In [14]:
### 7 letters
words_7_letters = []
words_7_types_counts = {}

with open("data/nltk_grand_corpus_types_and_counts_7.txt", "r", encoding = "utf-8") as f:
    for line in f.read().split("\n"):
        word_freq = line.split("\t")
        if len(word_freq) == 2: # how many items are in each line, NOT the len of the word in the line
            word = word_freq[0]
            freq = word_freq[1]
            if word.isascii() == True:
                words_7_letters.append(word)
                words_7_types_counts[word] = freq

f.close() # closes connection to file

print(len(words_7_letters))
print(words_7_letters[:5])
words_7_types_counts[words_7_letters[0]]

12566
['because', 'through', 'against', 'another', 'himself']


'4809'

In [15]:
for val in [False, True]:
        wordle_wizard(word_list = words_7_letters, max_guesses = 6, 
                guess = "because", target = "through", bias = 'entropy', 
                random_guess = True, random_target = True, 
                verbose = val, drama = 0, return_stats = False, record = True)

-----------------------------

Guess 1: 'cutteth'
Guess 2: 'citroen'
Guess 3: 'catered'

Congratulations! The Wordle has been solved in 3 guesses!
There were still 3 guesses remaining.

The target word was 'catered'.

-----------------------------
-----------------------------

Guess 1: 'mallice'
Letters in correct positions:
	[]

Letters in incorrect positions:
	[('i', 4), ('e', 6)]

Letters to guess again:
	['e', 'i']

Letters to not guess again:
	['a', 'c', 'l', 'm']

At this point:
	12024, 95.69% of total words have been eliminated, and
	542, 4.31% of total words remain possible.

The top 40 potential next guesses are:
	[('editors', 100.0), ('steroid', 100.0), ('ignores', 98.26), ('regions', 98.26), ('edtions', 97.18), ('pierson', 96.48), ('horites', 96.25), ('goriest', 95.52), ('insured', 94.43), ('hinders', 93.7), ('shrined', 93.7), ('kristen', 93.08), ('stinker', 93.08), ('kirsten', 93.08), ('tinkers', 93.08), ('winters', 92.27), ('erikson', 91.81), ('striven', 91.38), ('girdest

### Testing on 8-letter words

In [16]:
### 8 letters
words_8_letters = []
words_8_types_counts = {}

with open("data/nltk_grand_corpus_types_and_counts_8.txt", "r", encoding = "utf-8") as f:
    for line in f.read().split("\n"):
        word_freq = line.split("\t")
        if len(word_freq) == 2: # how many items are in each line, NOT the len of the word in the line
            word = word_freq[0]
            freq = word_freq[1]
            if word.isascii() == True:
                words_8_letters.append(word)
                words_8_types_counts[word] = freq

f.close() # closes connection to file

print(len(words_8_letters))
print(words_8_letters[:5])
words_8_types_counts[words_8_letters[0]]

11650
['children', 'together', 'director', 'anything', 'american']


'3012'

In [17]:
for val in [False, True]:
        wordle_wizard(word_list = words_8_letters, max_guesses = 6, 
                guess = "trinidad", target = "together", bias = 'entropy', 
                random_guess = False, random_target = False, 
                verbose = val, drama = 0, return_stats = False, record = True)

-----------------------------

Guess 1: 'trinidad'
Guess 2: 'tumblers'
Guess 3: 'together'

Congratulations! The Wordle has been solved in 3 guesses!
There were still 3 guesses remaining.

The target word was 'together'.

-----------------------------
-----------------------------

Guess 1: 'trinidad'
Letters in correct positions:
	[('t', 0)]

Letters in incorrect positions:
	[('r', 1)]

Letters to guess again:
	['r', 't']

Letters to not guess again:
	['a', 'd', 'i', 'n']

At this point:
	11624, 99.78% of total words have been eliminated, and
	26, 0.22% of total words remain possible.

All potential next guesses:
	[('tumbrels', 100.0), ('tumblers', 100.0), ('theorems', 91.84), ('throuble', 91.84), ('torquers', 82.1), ('terrours', 81.36), ('tortures', 81.36), ('throttle', 70.07), ('teleport', 67.5), ('threwest', 61.5), ('thereout', 61.5), ('together', 60.07), ('textures', 58.97), ('therfore', 53.47), ('therefor', 53.47), ('tweezers', 49.92), ('tourette', 48.69), ('torturer', 48.69), ('

### Testing on 9-letter words

In [18]:
### 9 letters
words_9_letters = []
words_9_types_counts = {}

with open("data/nltk_grand_corpus_types_and_counts_9.txt", "r", encoding = "utf-8") as f:
    for line in f.read().split("\n"):
        word_freq = line.split("\t")
        if len(word_freq) == 2: # how many items are in each line, NOT the len of the word in the line
            word = word_freq[0]
            freq = word_freq[1]
            if word.isascii() == True:
                words_9_letters.append(word)
                words_9_types_counts[word] = freq

f.close() # closes connection to file

print(len(words_9_letters))
print(words_9_letters[:5])
words_9_types_counts[words_9_letters[0]]

9716
['something', 'character', 'therefore', 'according', 'different']


'2621'

In [19]:
for val in [False, True]:
        wordle_wizard(word_list = words_9_letters, max_guesses = 6, 
                guess = "something", target = "character", bias = 'entropy', 
                random_guess = True, random_target = False, 
                verbose = val, drama = 0, return_stats = False, record = True)

-----------------------------

Guess 1: 'laughless'
Guess 2: 'anchorite'
Guess 3: 'ermatched'
Guess 4: 'charecter'
Guess 5: 'character'

Congratulations! The Wordle has been solved in 5 guesses!
There were still 1 guesses remaining.

The target word was 'character'.

-----------------------------
-----------------------------

Guess 1: 'audacious'
Letters in correct positions:
	[]

Letters in incorrect positions:
	[('a', 0), ('a', 3), ('c', 4)]

Letters to guess again:
	['a', 'c']

Letters to not guess again:
	['d', 'i', 'o', 's', 'u']

At this point:
	9629, 99.1% of total words have been eliminated, and
	87, 0.9% of total words remain possible.

The top 40 potential next guesses are:
	[('parchment', 100.0), ('rectangle', 99.73), ('centrally', 94.96), ('clergyman', 92.02), ('carpentry', 87.13), ('placement', 84.93), ('verplanck', 84.82), ('enchanter', 84.28), ('charlayne', 83.87), ('carpenter', 83.13), ('caprenter', 83.13), ('benchmark', 81.08), ('centenary', 79.54), ('vtterance', 78.7

### Testing on 10-letter words

In [20]:
### 10 letters
words_10_letters = []
words_10_types_counts = {}

with open("data/nltk_grand_corpus_types_and_counts_10.txt", "r", encoding = "utf-8") as f:
    for line in f.read().split("\n"):
        word_freq = line.split("\t")
        if len(word_freq) == 2: # how many items are in each line, NOT the len of the word in the line
            word = word_freq[0]
            freq = word_freq[1]
            if word.isascii() == True:
                words_10_letters.append(word)
                words_10_types_counts[word] = freq

f.close() # closes connection to file

print(len(words_10_letters))
print(words_10_letters[:5])
words_10_types_counts[words_10_letters[0]]

7200
['characters', 'themselves', 'everything', 'especially', 'understand']


'1929'

In [21]:
for val in [False, True]:
        wordle_wizard(word_list = words_10_letters, max_guesses = 6, 
                guess = "characters", target = "theologies", bias = 'entropy', 
                random_guess = True, random_target = False, 
                verbose = val, drama = 0, return_stats = False, record = True)

-----------------------------

Guess 1: 'chronology'
Guess 2: 'theologies'

Congratulations! The Wordle has been solved in 2 guesses!
There were still 4 guesses remaining.

The target word was 'theologies'.

-----------------------------
-----------------------------

Guess 1: 'seethingly'
Letters in correct positions:
	[('e', 2)]

Letters in incorrect positions:
	[('s', 0), ('e', 1), ('t', 3), ('h', 4), ('i', 5), ('g', 7), ('l', 8)]

Letters to guess again:
	['e', 'g', 'h', 'i', 'l', 's', 't']

Letters to not guess again:
	['n', 'y']

At this point:
	7199, 99.99% of total words have been eliminated, and
	1, 0.01% of total words remain possible.

The only remaining possible word is:
	'theologies'

Next guess:
	'theologies'

-----------------------------

Guess 2: 'theologies'

Congratulations! The Wordle has been solved in 2 guesses!
There were still 4 guesses remaining.

The target word was 'theologies'.

-----------------------------


### `compare_wordle()` testing

In [22]:
df = pd.read_csv("inputs/wordle_humans - Sheet1.csv")
print(df.shape)
df

(33, 8)


Unnamed: 0,first_guess,second_guess,third_guess,fourth_guess,fifth_guess,sixth_guess,target,player
0,arose,plate,cache,mauve,vague,none,vague,diane
1,douce,lairy,slave,algae,apple,none,apple,aidan
2,douce,lairy,gimps,ninth,none,none,ninth,aidan
3,douce,lairy,snail,flail,none,none,flail,aidan
4,douce,lairy,phase,stage,none,none,stage,aidan
5,douce,lairy,ready,beady,heady,none,heady,aidan
6,douce,dairy,dunes,debug,none,none,none,aidan
7,douce,lairy,aunts,swamp,usage,none,usage,aidan
8,douce,pound,found,bound,mound,sound,sound,aidan
9,douce,lairy,palms,salts,ghost,salsa,salsa,aidan


In [23]:
### TESTING DF INTERPRETATION

df = pd.read_csv("inputs/wordle_humans - Sheet1.csv")

row = 5

print(convert_row(df, row))
guess_list = convert_row(df, row)[0]
target_word = convert_row(df, row)[1]
player = convert_row(df, row)[2]

compare_wordle(word_list = official_words, max_guesses = 6, 
                    guess_list = guess_list, player = player, target = target_word,
                    verbose = False, return_stats = True, record = False)

(['douce', 'lairy', 'ready', 'beady', 'heady'], 'heady', 'aidan')


{'first_guess': ['douce', 'douce'],
 'target_word': ['heady', 'heady'],
 'first_guess_vowels': [3.0, 3.0],
 'first_guess_consonants': [2.0, 2.0],
 'target_vowels': [3.0, 3.0],
 'target_consonants': [2.0, 2.0],
 'target_entropy': [66.43, 66.43],
 'first_guess_entropy': [63.74, 63.74],
 'target_guessed': [True, True],
 'mid_guesses_avg_vows': [3.0, 2.5],
 'mid_guesses_avg_cons': [2.0, 2.5],
 'avg_perf_letters': [9.0, 0.0],
 'avg_wrong_pos_letters': [11.0, 15.0],
 'avg_wrong_letters': [22.0, 15.0],
 'avg_remaining': [16.0, 21.67],
 'avg_intermediate_guess_entropy': [92.75, 90.94],
 'valid_success': [True, True],
 'player': ['aidan', 'wizard'],
 'num_guesses': [5.0, 4.0],
 'expected_guesses': [4.0, 4.0],
 'luck': [-0.16, 0]}

## Comparing player solutions against wizard solutions

In [37]:
df = pd.read_csv("compared_data/wordle_humans - Sheet1.csv")

stats_master = {}
excepts = []
for row in df.index:
    guess_list = convert_row(df, row)[0]
    target_word = convert_row(df, row)[1]
    player = convert_row(df, row)[2]
    try:
        complete = compare_wordle(word_list = official_words, max_guesses = 6, 
                    guess_list = guess_list, player = player, target = target_word,
                    verbose = True, return_stats = True, record = False)
        for metric, results in complete.items():
            if metric in stats_master:
                for result in results:
                    stats_master[metric].append(result)
            else:
                stats_master[metric] = []
                for result in results:
                    stats_master[metric].append(result)
    except:
        AttributeError
        excepts.append(guess_list)

df_master = pd.DataFrame(stats_master)
print(df_master.columns.tolist())
df_master = df_master[['first_guess', 'target_word', 'player', 'num_guesses', 'expected_guesses', 'luck', 'first_guess_vowels', 'first_guess_consonants',
                     'target_vowels', 'target_consonants', 'first_guess_entropy', 'target_entropy',
                     'target_guessed', 'mid_guesses_avg_vows', 'mid_guesses_avg_cons', 'avg_perf_letters',
                     'avg_wrong_pos_letters', 'avg_wrong_letters', 'avg_remaining', 'avg_intermediate_guess_entropy',
                     'valid_success']]

print(excepts)
print(df_master.shape) # check shape before deleting dups

# Delete duplicate rows (some created by process)
for i, row in enumerate(df_master.duplicated()):
    if row == True:
        df_master.drop(i, axis = 0, inplace = True)

df_master.to_csv('data/players_compared.csv') # write new data to csv

print(df_master.shape) # check shape after deleting dups
df_master

['first_guess', 'target_word', 'first_guess_vowels', 'first_guess_consonants', 'target_vowels', 'target_consonants', 'first_guess_entropy', 'target_entropy', 'target_guessed', 'mid_guesses_avg_vows', 'mid_guesses_avg_cons', 'avg_perf_letters', 'avg_wrong_pos_letters', 'avg_wrong_letters', 'avg_remaining', 'avg_intermediate_guess_entropy', 'valid_success', 'player', 'num_guesses', 'expected_guesses', 'luck']
[['douce', 'dairy', 'dunes', 'debug'], ['douce', 'lairy', 'cache'], []]
(60, 21)
(56, 21)


Unnamed: 0,first_guess,target_word,player,num_guesses,expected_guesses,luck,first_guess_vowels,first_guess_consonants,target_vowels,target_consonants,...,target_entropy,target_guessed,mid_guesses_avg_vows,mid_guesses_avg_cons,avg_perf_letters,avg_wrong_pos_letters,avg_wrong_letters,avg_remaining,avg_intermediate_guess_entropy,valid_success
0,arose,vague,diane,5.0,5.0,0.0,3.0,2.0,3.0,2.0,...,58.31,True,2.6,2.4,6.0,9.0,26.0,19.0,99.9,True
1,arose,vague,wizard,5.0,5.0,0.0,3.0,2.0,3.0,2.0,...,58.31,True,2.6,2.4,7.0,4.0,30.0,19.25,99.9,True
2,douce,apple,aidan,5.0,6.0,0.23,3.0,2.0,2.0,3.0,...,62.88,True,2.6,2.4,5.0,11.0,30.0,49.5,92.74,True
3,douce,apple,wizard,6.0,6.0,0.0,3.0,2.0,2.0,3.0,...,62.88,True,2.33,2.67,12.0,4.0,43.0,40.0,93.95,True
4,douce,ninth,aidan,4.0,5.0,0.27,3.0,2.0,1.0,4.0,...,35.41,True,2.0,3.0,1.0,2.0,27.0,123.67,90.92,True
5,douce,ninth,wizard,5.0,5.0,0.0,3.0,2.0,1.0,4.0,...,35.41,True,1.8,3.2,3.0,12.0,35.0,95.5,92.74,True
6,douce,flail,aidan,4.0,4.0,0.09,3.0,2.0,2.0,3.0,...,42.27,True,2.5,2.5,3.0,6.0,21.0,123.0,90.92,True
7,douce,flail,wizard,4.0,4.0,0.0,3.0,2.0,2.0,3.0,...,42.27,True,2.25,2.75,6.0,0.0,21.0,120.67,90.92,True
8,douce,stage,aidan,4.0,4.0,0.09,3.0,2.0,2.0,3.0,...,81.44,True,2.5,2.5,4.0,3.0,22.0,65.67,90.92,True
9,douce,stage,wizard,4.0,4.0,0.0,3.0,2.0,2.0,3.0,...,81.44,True,2.25,2.75,9.0,0.0,15.0,59.67,90.92,True


In [32]:
print(df_master.query("player == 'aidan'")['num_guesses'].mean())
print(df_master.query("player == 'aidan'").shape)
df_master.query("player == 'aidan'").head()

4.5
(18, 21)


Unnamed: 0,first_guess,target_word,player,num_guesses,expected_guesses,luck,first_guess_vowels,first_guess_consonants,target_vowels,target_consonants,...,target_entropy,target_guessed,mid_guesses_avg_vows,mid_guesses_avg_cons,avg_perf_letters,avg_wrong_pos_letters,avg_wrong_letters,avg_remaining,avg_intermediate_guess_entropy,valid_success
2,douce,apple,aidan,5.0,5.0,0.07,3.0,2.0,2.0,3.0,...,62.83,True,2.6,2.4,5.0,11.0,30.0,49.5,92.75,True
4,douce,ninth,aidan,4.0,5.0,0.27,3.0,2.0,1.0,4.0,...,35.38,True,2.0,3.0,1.0,2.0,27.0,123.0,90.92,True
6,douce,flail,aidan,4.0,4.0,0.09,3.0,2.0,2.0,3.0,...,42.3,True,2.5,2.5,3.0,6.0,21.0,122.33,90.92,True
8,douce,stage,aidan,4.0,4.0,0.09,3.0,2.0,2.0,3.0,...,81.29,True,2.5,2.5,4.0,3.0,22.0,65.67,90.92,True
10,douce,heady,aidan,5.0,4.0,-0.16,3.0,2.0,3.0,2.0,...,66.43,True,3.0,2.0,9.0,11.0,22.0,16.0,92.74,True


In [33]:
print(df_master.query("player == 'dad'")['num_guesses'].mean())
print(df_master.query("player == 'dad'").shape)
df_master.query("player == 'dad'").head()

4.2
(5, 21)


Unnamed: 0,first_guess,target_word,player,num_guesses,expected_guesses,luck,first_guess_vowels,first_guess_consonants,target_vowels,target_consonants,...,target_entropy,target_guessed,mid_guesses_avg_vows,mid_guesses_avg_cons,avg_perf_letters,avg_wrong_pos_letters,avg_wrong_letters,avg_remaining,avg_intermediate_guess_entropy,valid_success
38,audio,syrup,dad,4.0,3.0,-0.21,4.0,1.0,2.0,3.0,...,49.32,True,2.5,2.5,1.0,10.0,18.0,37.0,90.5,True
40,audio,worse,dad,4.0,4.0,0.09,4.0,1.0,2.0,3.0,...,76.4,True,2.5,2.5,5.0,9.0,15.0,95.33,90.5,True
46,audio,polka,dad,4.0,3.0,-0.21,4.0,1.0,2.0,3.0,...,55.0,True,2.5,2.5,2.0,11.0,17.0,32.0,90.51,True
48,audio,moose,dad,6.0,6.0,0.06,4.0,1.0,3.0,2.0,...,53.53,True,2.83,2.17,13.0,9.0,37.0,59.8,93.67,True
58,audio,squat,dad,3.0,3.0,0.13,4.0,1.0,2.0,3.0,...,50.67,True,3.0,2.0,1.0,6.0,8.0,17.0,87.34,True


In [34]:
print(df_master.query("player == 'diane'")['num_guesses'].mean())
print(df_master.query("player == 'diane'").shape)
df_master.query("player == 'diane'").head()

3.6666666666666665
(6, 21)


Unnamed: 0,first_guess,target_word,player,num_guesses,expected_guesses,luck,first_guess_vowels,first_guess_consonants,target_vowels,target_consonants,...,target_entropy,target_guessed,mid_guesses_avg_vows,mid_guesses_avg_cons,avg_perf_letters,avg_wrong_pos_letters,avg_wrong_letters,avg_remaining,avg_intermediate_guess_entropy,valid_success
0,arose,vague,diane,5.0,5.0,0.0,3.0,2.0,3.0,2.0,...,58.25,True,2.6,2.4,6.0,9.0,26.0,18.75,99.86,True
42,audio,worse,diane,4.0,4.0,0.09,4.0,1.0,2.0,3.0,...,76.4,True,2.5,2.5,4.0,8.0,17.0,96.67,90.5,True
44,audio,polka,diane,3.0,3.0,0.13,4.0,1.0,2.0,3.0,...,54.91,True,2.67,2.33,3.0,5.0,7.0,38.5,87.34,True
50,audio,moose,diane,4.0,6.0,0.4,4.0,1.0,3.0,2.0,...,53.53,True,2.75,2.25,5.0,5.0,18.0,98.0,90.51,True
54,audio,above,diane,3.0,4.0,0.34,4.0,1.0,3.0,2.0,...,65.89,True,3.33,1.67,4.0,2.0,8.0,16.5,87.35,True


In [35]:
print(df_master.query("player == 'wizard'")['num_guesses'].mean())
print(df_master.query("player == 'wizard'").shape)
df_master.query("player == 'wizard'").head()

3.857142857142857
(28, 21)


Unnamed: 0,first_guess,target_word,player,num_guesses,expected_guesses,luck,first_guess_vowels,first_guess_consonants,target_vowels,target_consonants,...,target_entropy,target_guessed,mid_guesses_avg_vows,mid_guesses_avg_cons,avg_perf_letters,avg_wrong_pos_letters,avg_wrong_letters,avg_remaining,avg_intermediate_guess_entropy,valid_success
1,arose,vague,wizard,5.0,5.0,0.0,3.0,2.0,3.0,2.0,...,58.25,True,2.6,2.4,7.0,4.0,30.0,19.0,99.86,True
3,douce,apple,wizard,5.0,5.0,0.0,3.0,2.0,2.0,3.0,...,62.83,True,2.4,2.6,8.0,7.0,28.0,48.0,92.75,True
5,douce,ninth,wizard,5.0,5.0,0.0,3.0,2.0,1.0,4.0,...,35.38,True,1.8,3.2,3.0,12.0,35.0,95.0,92.74,True
7,douce,flail,wizard,4.0,4.0,0.0,3.0,2.0,2.0,3.0,...,42.3,True,2.25,2.75,6.0,0.0,21.0,120.0,90.92,True
9,douce,stage,wizard,4.0,4.0,0.0,3.0,2.0,2.0,3.0,...,81.29,True,2.5,2.5,7.0,2.0,17.0,59.33,90.92,True
