In [29]:
"""
https://leetcode.com/problems/word-ladder/


A transformation sequence from word beginWord to word endWord using a dictionary wordList is
a sequence of words beginWord -> s1 -> s2 -> ... -> sk such that:

Every adjacent pair of words differs by a single letter.
Every si for 1 <= i <= k is in wordList. Note that beginWord does not need to be in wordList.
sk == endWord
Given two words, beginWord and endWord, and a dictionary wordList, 
return the number of words in the shortest transformation sequence from beginWord to endWord, 
or 0 if no such sequence exists.

Constraints:
1 <= beginWord.length <= 10
endWord.length == beginWord.length
1 <= wordList.length <= 5000
wordList[i].length == beginWord.length
beginWord, endWord, and wordList[i] consist of lowercase English letters.
beginWord != endWord
All the words in wordList are unique.
"""

# N: num words
# L: word length

# search: depth? breath? first
# could/should not be deeper than the total number of words
# nodes
# edges:   pair of words in beginWord, endWord, words
#          undirected
#            pre-compute?  len(words) == 5000
#             bruteforce takes too long
#             any tricks to speed this up?

#     dog -> set of possible set of common letters: do* d*g *og
#     cog ->  co* c*g *og

#  dict of search string -> words O((N + 2) * L)
#    *og -> cog, dog, fog, hog, ...
# 

from collections import defaultdict, deque

def wordLadder(beginWord, endWord, words):
    if len(beginWord) == 0:
        return 0
    if beginWord == endWord:
        return 0
    if len(words) == 0:
        return 0
    if endWord not in words:
        return 0
    L = len(beginWord)
    N = len(words)
    searchDict = defaultdict(set)
    # *og -> [ cog, dog, ...] 

    for w in [beginWord, endWord] + words:
        cl = [ c for c in w ]
        for cIdx in range(L):
            c = w[cIdx]
            cl[cIdx] = '*'
            searchDict[''.join(cl)].add(w)
            cl[cIdx] = c
    # print(searchDict)
    # we could delete items with len()==1 / dead-end

    # stack for dfs, queue for bfs
    # choosing bfs since once found, we can return right away
    #    for bfs, do we need to keep a separate copy of visited for each state?
    #    even if a node is visited by another state, once it is visited 
    #    it is not worthwhile to visit any longer
    #    but it must be level-by-level (for loop not consuming a state but entire breath level at once)
    # wit dfs, we do not need to clone visited for all states but can keep only one copy
    # but will have to continue the search even after the first discovery
    q = deque()
    q.append((beginWord, set([beginWord])))
    while len(q)>0:
        # print(f'{q=}')
        curW, visited = q.popleft()
        curCl = [ c for c in curW ]
        for cIdx in range(L):
            c=curW[cIdx]
            curCl[cIdx] = '*'
            # print('searchDict[',''.join(curCl),']=', searchDict[''.join(curCl)])
            for nextW in searchDict[''.join(curCl)]:
                if nextW in visited:
                    continue
                newVisited = set(visited)
                newVisited.add(nextW)
                if nextW == endWord:
                    return len(newVisited)
                if len(newVisited)-2 == L: # TODO check edge case; L or L-1
                    continue
                q.append((nextW, newVisited))
            curCl[cIdx] = c
    return 0


tests = [
    ("hit", "cog", ["hot","dot","dog","lot","log","cog"], 5),
    #One shortest transformation sequence is "hit" -> "hot" -> "dot" -> "dog" -> cog", which is 5 words long.
    ("hit", "cog", ["hot","dot","dog","lot","log"], 0)
]

for t in tests:
    assert(wordLadder(t[0], t[1], t[2]) == t[3])
