# **[Term.ooo](https://term.ooo) "*Random*" Simulations**

This notebook performs **1,397** "*random*" simulations for each one of the **1,442** possible valid answers of the game, totalling **2,014,474** simulations. This simulations aren't completely random, the strategy work as follows:

+ After each attempt, the words are filtered based on the hits and misses of each element, so, the next attempt is going to be a word randomly selected from the filtered ones.
+ The first **2** attempts apply a condition to pick only available words that have the maximum number of unique elements, maximizing the chances of getting good results.
+ It simulates "*hard mode*" where the next attempt always uses the correct and misplaced elements from the current attempt.

The output file utilizes the following chars to represent the hits and misses:

+ **c** : Correctly placed element (🟩 - Green)
+ **m** : Misplaced element (🟨 - Purple)
+ _ : Incorrect element (⬛ - Black)

# Imports and Constants

In [16]:
from multiprocessing import Pool
from tqdm import tqdm
import pandas as pd


# Game parameters
NUMBER_OF_ATTEMPTS = 6
NUMBER_OF_ELEMENTS = 5

# Elements
ELEMENT_EMPTY = " "
ELEMENT_CORRECT = "c"
ELEMENT_MISPLACED = "m"
ELEMENT_INCORRECT = "_"
ELEMENTS = list("abcdefghijklmnopqrstuvwxyz")

# Attempts
ATTEMPT_CORRECT = ELEMENT_CORRECT * NUMBER_OF_ELEMENTS
ATTEMPT_INCORRECT = ELEMENT_INCORRECT * NUMBER_OF_ELEMENTS
EMPTY_GAME_SIMULATION = [ELEMENT_EMPTY * NUMBER_OF_ELEMENTS] * (NUMBER_OF_ATTEMPTS * 2)


# CSV header
CSV_HEADER = ["solution"]
for i in range(NUMBER_OF_ATTEMPTS):
    CSV_HEADER.append(f"attempt_{i}")
    CSV_HEADER.append(f"hits_{i}")

In [17]:
# Simulation parameters

SIMS_COUNT = 1_000
UNIQUE_ATTEMPTS = 2

FILEPATH_IN = f"./data/1.raw/valid_answers.csv"
FILEPATH_OUT = f"./data/2.simulations/simulations_random.csv"

# Load data

*(And setting the index)*

In [18]:
df_words = pd.read_csv(FILEPATH_IN)
df_words.index = df_words["word"]
df_words

Unnamed: 0_level_0,word
word,Unnamed: 1_level_1
abano,abano
abono,abono
abril,abril
abrir,abrir
abuso,abuso
...,...
zerar,zerar
zinco,zinco
ziper,ziper
zonas,zonas


# Pre processing

In [19]:
# Init a dictionary to store all temporary DataFrames
dfs = {k: pd.DataFrame() for k in ["positions", "unique", "elements"]}


# Spliting positions int o columns
for i in range(NUMBER_OF_ELEMENTS):
    dfs["positions"][f"p{i}"] = df_words["word"].str[i].astype("category")

# Counting the number of unique elements per word
dfs["unique"]["count"] = df_words["word"].apply(lambda x: len(set(x))).astype("uint8")

# Counting the amount of each element in each word
for element in ELEMENTS:
    dfs["elements"][element] = df_words["word"].apply(lambda x: x.count(element)).astype("uint8")


# Concatenating the DataFrames into a single multi-indexed one
df_words = pd.concat(dfs, axis=1)
df_words

Unnamed: 0_level_0,positions,positions,positions,positions,positions,unique,elements,elements,elements,elements,elements,elements,elements,elements,elements,elements,elements,elements,elements,elements,elements
Unnamed: 0_level_1,p0,p1,p2,p3,p4,count,a,b,c,d,...,q,r,s,t,u,v,w,x,y,z
word,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
abano,a,b,a,n,o,4,2,1,0,0,...,0,0,0,0,0,0,0,0,0,0
abono,a,b,o,n,o,4,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
abril,a,b,r,i,l,5,1,1,0,0,...,0,1,0,0,0,0,0,0,0,0
abrir,a,b,r,i,r,4,1,1,0,0,...,0,2,0,0,0,0,0,0,0,0
abuso,a,b,u,s,o,5,1,1,0,0,...,0,0,1,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zerar,z,e,r,a,r,4,1,0,0,0,...,0,2,0,0,0,0,0,0,0,1
zinco,z,i,n,c,o,5,0,0,1,0,...,0,0,0,0,0,0,0,0,0,1
ziper,z,i,p,e,r,5,0,0,0,0,...,0,1,0,0,0,0,0,0,0,1
zonas,z,o,n,a,s,5,1,0,0,0,...,0,0,1,0,0,0,0,0,0,1


# **Simulation functions**

### Compute the hits of the attempt

In [20]:
def compute_hits(solution: str, attempt: str):

    # Init counter
    counter = {}

    # Count the number of each unique element in the attempt
    for element in set(attempt):
        counter[element] = solution.count(element)

    # Init hits
    hits = list(ATTEMPT_INCORRECT)

    # Compute 'correct' hits
    for i in range(NUMBER_OF_ELEMENTS):
        if attempt[i] == solution[i]:
            counter[attempt[i]] -= 1
            hits[i] = ELEMENT_CORRECT

    # Compute 'misplaced' hits
    for i in range(NUMBER_OF_ELEMENTS):
        if counter[attempt[i]] > 0 and hits[i] == ELEMENT_INCORRECT:
            counter[attempt[i]] -= 1
            hits[i] = ELEMENT_MISPLACED

    # Return hits
    return "".join(hits)

In [21]:
# Perform tests

TESTS = [
    (("abano", "perua"), "____m"),
    (("advir", "haver"), "_mc_c"),
    (("agudo", "lugar"), "_mmm_"),
    (("autor", "gueto"), "_c_mm"),
    (("azedo", "azedo"), "ccccc"),
]

for test, result in TESTS:
    assert compute_hits(*test) == result
else:
    print("All 'compute_hits()' tests passed")

All 'compute_hits()' tests passed


### Count the number of each element in the attempt based on the attempt hits

In [22]:
def count_elements(attempt: str, hits: str):

    # Init counter
    counter = {element: {"min": 0, "max": 0} for element in set(attempt)}

    # Count the sum of correct and misplaced elements as well as the number of incorrect elements
    for i in range(NUMBER_OF_ELEMENTS):
        if hits[i] in (ELEMENT_MISPLACED, ELEMENT_CORRECT):
            counter[attempt[i]]["min"] += 1

        else:
            counter[attempt[i]]["max"] += 1

    # Set the counter to the min and max possible elements in the solution
    for element in counter:
        counts = counter[element]

        if counts["min"] == 0:
            counts["max"] = 0

        elif counts["max"] == 0:
            counts["max"] = NUMBER_OF_ELEMENTS

        elif counts["max"] != 0:
            counts["max"] = counts["min"]

    # Return the counter
    return counter

In [23]:
# Perform tests

TESTS = [
    (("abano", "____m"), {'n': {'min': 0, 'max': 0}, 'b': {'min': 0, 'max': 0}, 'a': {'min': 0, 'max': 0}, 'o': {'min': 1, 'max': 5}}),
    (("advir", "_mc_c"), {'d': {'min': 1, 'max': 5}, 'i': {'min': 0, 'max': 0}, 'a': {'min': 0, 'max': 0}, 'v': {'min': 1, 'max': 5}, 'r': {'min': 1, 'max': 5}}),
    (("agudo", "_mmm_"), {'d': {'min': 1, 'max': 5}, 'u': {'min': 1, 'max': 5}, 'a': {'min': 0, 'max': 0}, 'g': {'min': 1, 'max': 5}, 'o': {'min': 0, 'max': 0}}),
    (("autor", "_c_mm"), {'t': {'min': 0, 'max': 0}, 'u': {'min': 1, 'max': 5}, 'a': {'min': 0, 'max': 0}, 'o': {'min': 1, 'max': 5}, 'r': {'min': 1, 'max': 5}}),
    (("azedo", "ccccc"), {'d': {'min': 1, 'max': 5}, 'a': {'min': 1, 'max': 5}, 'z': {'min': 1, 'max': 5}, 'e': {'min': 1, 'max': 5}, 'o': {'min': 1, 'max': 5}}),
]

for test, result in TESTS:
    assert count_elements(*test) == result
else:
    print("All 'count_elements()' tests passed")

All 'count_elements()' tests passed


### Filter the candidate equations based on the attempt hits and attempt counter

In [24]:
def filter_words(words: pd.DataFrame, word: str, hits: str, counter: dict):

    # Filter based on the element counts
    for element in counter:
        words = words[words["elements"][element] >= counter[element]["min"]]
        words = words[words["elements"][element] <= counter[element]["max"]]

    # Filter based on the attempt hits positions
    for i in range(NUMBER_OF_ELEMENTS):
        if hits[i] == ELEMENT_CORRECT:
            words = words[words["positions"][f"p{i}"] == word[i]]

        else:
            words = words[words["positions"][f"p{i}"] != word[i]]
    
    # Return the filtered words
    return words

### Simulate all of the attempts of a game

In [25]:
def play_game(words: pd.DataFrame, solution: str, unique_attempts: int,):

    # Init output simulation data
    simulation = [solution] + EMPTY_GAME_SIMULATION

    # Iterate over the attempts
    for i in range(NUMBER_OF_ATTEMPTS):

        # Get words with the most unique elements based on the number of unique attempts passed
        if i < unique_attempts:
            temp = words[words[("unique", "count")] == words[("unique", "count")].max()]
        else:
            temp = words

        # Get a random word and compute hits
        word = temp.sample(1).iloc[0].name
        hits = compute_hits(solution, word)

        # Store values on the simulation data
        simulation[(2 * i) + 1] = word
        simulation[(2 * i) + 2] = hits

        # Break if the solution is found
        if hits == ATTEMPT_CORRECT:
            break
        
        # Filter words based on the attempt hits
        counter = count_elements(word, hits)
        words = filter_words(words, word, hits, counter)

    # Return simulation data
    return simulation

### Simulate a game for every solution

In [26]:
def simulate_all_solutions(words: pd.DataFrame):
    
    # Init simulations data
    simulations = []

    # Iterate over every solution
    for solution in words.index:

        # Run and append the simulation data
        simulations.append(play_game(words, solution, UNIQUE_ATTEMPTS))
    
    # Append the simulation data to the CSV file
    with open(FILEPATH_OUT, "a", newline="") as f:
        pd.DataFrame(simulations, columns=CSV_HEADER).to_csv(f, header=False, index=False)

# Run simulations

In [None]:
# Init output file
with open(FILEPATH_OUT, "w", newline="") as f:
    pd.DataFrame(columns=CSV_HEADER).to_csv(f, index=False)

# Starting the multiprocessing Pool
with Pool() as p:
    list(tqdm(p.imap(simulate_all_solutions, [df_words for _ in range(SIMS_COUNT)]), total=SIMS_COUNT))