In [None]:
import pandas as pd
import time
import rtsvg
rt = rtsvg.RACETrack()
from xwords import XWords, XWordsSolver
import copy
import os
_dir_    = '../../../data/crossword_puzzle_screenshots/'
_files_  = os.listdir(_dir_)
entries_file    = None
geometries_file = None
blockers_file   = None
answers_file    = None
for _file_ in _files_:
    if   'entries'    in _file_: entries_file    = _dir_ + _file_
    elif 'geometries' in _file_: geometries_file = _dir_ + _file_
    elif 'blockers'   in _file_: blockers_file   = _dir_ + _file_
    elif 'answers'    in _file_: answers_file    = _dir_ + _file_
xwords = XWords(rt, entries_file, geometries_file, blockers_file, answers_file)
results_lu = {} # resets the answers seen so far... don't do this or it loses all the work done so far
xwords

In [None]:
import os
import importlib.util
import inspect
import requests
def getClassesFromFile(file_path):
    classes = []
    module_name = os.path.splitext(os.path.basename(file_path))[0]
    spec        = importlib.util.spec_from_file_location(module_name, file_path)
    module      = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(module)
    for name, obj in inspect.getmembers(module):
        if inspect.isclass(obj) and obj.__module__ == module.__name__: classes.append(obj)
    return classes
# https://stackoverflow.com/questions/79372940/how-to-get-a-list-of-models-available-in-ollama-using-langchain
OLLAMA_URL = "http://127.0.0.1:11434"
def getInstalledModels() -> list:
    thelist = requests.get(OLLAMA_URL+"/api/tags")
    jsondata = thelist.json()
    result = list()
    for model in jsondata["models"]: 
        _model_ = model["model"]
        if _model_.endswith(":latest"): _model_ = _model_[:-len(':latest')]
        result.append(_model_)
    return result
all_models = set(getInstalledModels()) - set(['nomic-embed-text']) # remove embedding models
all_models = all_models - set(['qwen3:0.6b']) # this model won't finish... 
print('total models: ', len(all_models)) # all_models

In [None]:
def loadResultsAndAnswers():
    df_results_list = []
    df_answers_list = []
    _files_ = os.listdir(_dir_)
    for _file_ in _files_:
        if   _file_.endswith('_xwords_answers.parquet'): df_answers_list.append(pd.read_parquet(_dir_ + _file_))
        elif _file_.endswith('_xwords_results.parquet'): df_results_list.append(pd.read_parquet(_dir_ + _file_))
    return pd.concat(df_results_list), pd.concat(df_answers_list)
df_results, df_answers = loadResultsAndAnswers()
print(f'{len(df_results)=} {len(df_answers)=}')
models = all_models - set(df_answers['model'])
print('remaining models: ', len(models), models)

In [None]:
for _model_ in models:
    print(_model_)
    for _filename_ in os.listdir():
        if _filename_.endswith('.py') == False: continue
        for _class_ in getClassesFromFile(_filename_):
            if issubclass(_class_, XWordsSolver):
                if _class_.__name__ not in results_lu: results_lu[_class_.__name__] = {}
                print(_class_)
                if _model_ in results_lu[_class_.__name__]: 
                    print('skipping ', _model_)
                    continue
                xwords_copy = copy.deepcopy(xwords)
                _instance_ = _class_(xwords=xwords_copy, model=_model_)
                answer_lu, request_stats, num_of_llm_requests = _instance_.solve()
                char_level_acc = xwords_copy.characterLevelAccuracy()
                results_lu[_class_.__name__][_model_] = (answer_lu, request_stats, num_of_llm_requests, char_level_acc, xwords_copy)
                print(f'\n{_class_.__name__} {_model_} {char_level_acc}')

In [None]:
_tiles_ = []
for _algo_ in results_lu.keys():
    for _model_ in models:
        if _model_ not in results_lu[_algo_]: _xwords_ = copy.deepcopy(xwords)
        else:                                 _xwords_ = results_lu[_algo_][_model_][4]
        _tiles_.append(_xwords_.smallMultipleSVG())
rt.table(_tiles_, per_row=len(models), spacer=10)

In [None]:
_df_answers_ = {'algorithm':[], 'model':[], 'clue_number':[], 'orientation':[], 'answer':[]}
_df_results_ = {'algorithm':[], 'model':[], 'char_level_accuracy':[], 'num_of_llm_requests':[], 'time':[], 'prompt_tokens':[], 'output_tokens':[]}
for _algo_ in results_lu.keys():
    for _model_ in results_lu[_algo_].keys():
        _tuple_ = results_lu[_algo_][_model_]
        _time_sum_, _prompt_sum_, _output_sum_ = 0.0, 0, 0
        for x in _tuple_[1]:
            _time_sum_   += x[2]
            _prompt_sum_ += x[3]
            _output_sum_ += x[4]
        _num_of_llm_requests_ = _tuple_[2]
        _char_level_accuracy_ = _tuple_[3]
        for _clue_ in _tuple_[0].keys():
            _df_answers_['algorithm'].append(_algo_)
            _df_answers_['model'].append(_model_)
            _df_answers_['clue_number'].append(_clue_[0])
            _df_answers_['orientation'].append(_clue_[1])
            _df_answers_['answer'].append(_tuple_[0][_clue_])
        _df_results_['algorithm'].append(_algo_)
        _df_results_['model'].append(_model_)
        _df_results_['char_level_accuracy'].append(_char_level_accuracy_)
        _df_results_['num_of_llm_requests'].append(_num_of_llm_requests_)
        _df_results_['time'].append(_time_sum_)
        _df_results_['prompt_tokens'].append(_prompt_sum_)
        _df_results_['output_tokens'].append(_output_sum_)
df_answers_new = pd.DataFrame(_df_answers_)
df_results_new = pd.DataFrame(_df_results_)

if len(df_answers_new) > 0:
    _filename_ = _dir_ + '20250505_xwords_answers.parquet'
    if os.path.exists(_filename_): raise Exception('file already exists')
    df_answers_new.to_parquet(_filename_)
    _filename_ = _dir_ + '20250505_xwords_results.parquet'
    if os.path.exists(_filename_): raise Exception('file already exists')
    df_results_new.to_parquet(_filename_)

In [None]:
df_results, df_answers = loadResultsAndAnswers()

_algorithms_ = list(set(df_results['algorithm']))
_colors_     = rt.co_mgr.brewerColors(scale_type='qualitative', n=len(_algorithms_), alt=1)
for i in range(len(_algorithms_)): rt.co_mgr.str_to_color_lu[_algorithms_[i]] = _colors_[i]

parms = {'color_by':'algorithm', 'w':384, 'h':384}
rt.tile([rt.xy       (df_results, x_field='time', y_field='char_level_accuracy', dot_size='large', **parms),
         rt.histogram(df_results, bin_by='algorithm',           count_by='char_level_accuracy', color_by='algorithm', h=384, w=256),
         rt.histogram(df_results, bin_by=['model','algorithm'], count_by='char_level_accuracy', **parms)], spacer=10)

In [None]:
df_answers['answer_lower'] = df_answers['answer'].str.lower()
_orientation_, _clue_num_ = 'down', 65
rt.tile([rt.histogram(df_answers.query('orientation == @_orientation_ and clue_number == @_clue_num_'), bin_by='answer_lower', color_by='algorithm', bar_h=20, w=384, h=670),
         xwords])

In [None]:
#
# Best possible w/ the small models... if the correct answer were chosen...
# ... 0.81 character level accuracy
#
_sorter_ = []
for _model_ in set(df_answers['model']):
    xwords.clearAll()
    for _tuple_ in xwords.entries:
        _clue_num_, _orientation_ = _tuple_
        _df_ = df_answers.query('clue_number == @_clue_num_ and orientation == @_orientation_ and model == @_model_')
        if len(_df_) == 0: continue
        if xwords.answer(_clue_num_, _orientation_).lower() in set(_df_['answer_lower']):
            xwords.guess(_clue_num_, _orientation_, xwords.answer(_clue_num_, _orientation_))
    _sorter_.append((xwords.characterLevelAccuracy(), _model_))
_sorter_.sort(reverse=True)
for _tuple_ in _sorter_:
    print(f'{_tuple_[1]:>24} | {_tuple_[0]:.3f}')

In [None]:
df_answers.sample(3)