# SUTOM starter

### Determine the optimal first word to enter for a given SUTOM challenge 

In [1]:
%load_ext lab_black

In [2]:
from tqdm import tqdm
from collections import defaultdict
import pandas as pd
from sutom import Dictionary

### Download latest word list here: https://framagit.org/JonathanMM/sutom/-/blob/main/ts/mots/listeMotsProposables.ts

In [3]:
wds = set()
with open("listeMotsProposables.ts", encoding="utf-8", mode="r") as f:
    for l in f.readlines()[2:-2]:
        wds.add(l.strip().split(",")[0][1:-1])
sutom_dictionary = Dictionary(wds)
sutom_dictionary

Dictionary including 149861 words (size 6 to 9) with alphabet of 26 letters

### Determine best starters for a SUTOM challenge

In [4]:
nb_letters = 7
first_letter = "V"

In [5]:
candidates = sutom_dictionary.sutom_words[nb_letters][first_letter]
print(
    f"{len(candidates)} words with {nb_letters} letters starting with {first_letter} in SUTOM dictionary"
)

836 words with 7 letters starting with V in SUTOM dictionary


In [6]:
score = defaultdict(int)

for candidate in tqdm(candidates):
    for target in candidates:
        if target == candidate:
            score[candidate] += 1
        else:
            sutom_answers = {candidate: sutom_dictionary.get_answer(candidate, target)}
            if (
                sutom_dictionary.sutom_max_letter_occurence[candidate] == 1
            ):  # no repetition letters:
                score[candidate] += len(
                    sutom_dictionary.solve(
                        nb_letters, first_letter, sutom_answers, no_repetition=True
                    )
                )
            else:
                score[candidate] += len(
                    sutom_dictionary.solve(nb_letters, first_letter, sutom_answers)
                )

100%|████████████████████████████████████████████████████████████████████████████████| 836/836 [04:34<00:00,  3.04it/s]


### Save best starters list

In [7]:
a = {k: v / len(candidates) for k, v in score.items()}
df = pd.DataFrame.from_dict(a, orient="index")
file_name = "./scores/scores_" + str(nb_letters) + "_" + first_letter + ".csv"
df.to_csv(file_name, header=False)

### Top ten words to start this SUTOM challenge

In [8]:
sorted(a.items(), key=lambda x: x[1])[:10]

[('VIRALES', 5.937799043062201),
 ('VIRATES', 5.947368421052632),
 ('VARIEES', 6.2631578947368425),
 ('VARIONS', 6.705741626794258),
 ('VARIETE', 6.837320574162679),
 ('VASIERE', 6.892344497607655),
 ('VALINES', 6.9330143540669855),
 ('VITALES', 7.045454545454546),
 ('VALISER', 7.119617224880383),
 ('VETIRAS', 7.157894736842105)]