# Nerdle Solver - Initial Guess Assessment Optimization
We prove (by brute-force) that you can always solve mini-Nerdle in at most $4$ guesses regardless of the starting expression, provided you use the optimal strategy. The worst start having repeating numbers and thus less information, e.g. `10-5=5`. The best start has all different numbers: `28/7=4`, which needs at most $3$ guesses and $2.65 \pm 0.5$ guesses.

To find the best initial guess, we map out the game tree.

In [300]:
%load_ext autoreload
%autoreload 2

import collections
import ctypes
import itertools
import multiprocessing
import numpy as np
import matplotlib.pyplot as plt

import analysis
import nerdle
import score as s
import generator
sgo = ctypes.CDLL(s.SCORE_GUESS_OPT_SO)
from nerdle import Hint

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Initial Guess Optimization

In [None]:
for num_slots in range(6, 9):
    print("num_slots", num_slots)
    db_file = "db/nerdle{}.db".format(num_slots) 
    solver_data = nerdle.create_solver_data(num_slots, db_file)
    d = solver_data.score_db

    print("Building tree")
    tree = analysis.GameTreeBuilder(solver_data).build()
    tdc = TreeDepthCalculator(tree)

    # Distribution of #guesses for all answers.
    num_guesses = np.array([depth for node, depth in tdc.depth.items() if not node.children]) + 1
    freq = collections.Counter(num_guesses)
    num_leaves = sum(1 for node in tdc.depth if not node.children)

    print("Best initial guess", tree.key[1], "#distinct games", len(tdc.depth), "#leaves", num_leaves)
    for k, v in sorted(freq.items()):
        print("{} guesses: {:6.2f}%".format(k, 100 * v / num_leaves))
    plt.hist(num_guesses);

num_slots 6
Building tree
Best initial guess 3*8=24 #distinct games 250 #leaves 206
2 guesses:  15.05%
3 guesses:  83.98%
4 guesses:   0.97%
num_slots 7
Building tree
Best initial guess 24-16=8 #distinct games 9363 #leaves 7561
2 guesses:   1.44%
3 guesses:  57.23%
4 guesses:  41.33%
num_slots 8
Building tree


In [350]:
for num_slots in range(6, 9):
    print("num_slots", num_slots)
    db_file = "db/nerdle{}.db".format(num_slots) 
    solver_data = nerdle.create_solver_data(num_slots, db_file)
    d = solver_data.score_db
    answers = solver_data.all_keys
    %time bucket_size, _, k = min((max(collections.Counter(d[k]).values()), k not in answers, k) for k in solver_data.all_keys)
    print("{} bucket size {} reduction factor {:.1f}".format(
        solver_data.answers[k], bucket_size, d.shape[1] / bucket_size))

num_slots 6
CPU times: user 12.7 ms, sys: 395 µs, total: 13.1 ms
Wall time: 13.4 ms
3*8=24 bucket size 10 reduction factor 20.6
num_slots 7
CPU times: user 7.3 s, sys: 12.8 ms, total: 7.31 s
Wall time: 7.32 s
24-16=8 bucket size 158 reduction factor 47.9
num_slots 8
CPU times: user 41.1 s, sys: 92.4 ms, total: 41.2 s
Wall time: 41.3 s
58-46=12 bucket size 101 reduction factor 175.5


In [352]:
b = analysis._score_dict(answers, d, k) 

In [360]:
len(b), max(map(len, b.values()))

(1650, 101)

In [359]:
solver_data.answers[k]

'58-46=12'