In [5]:
import pandas as pd

In [6]:
state_pops = pd.read_csv('./states_population.csv')
state_pops['NAME'] = state_pops['NAME'].str.lower()
state_pops['NAME'] = state_pops['NAME'].str.replace(' ', '')
state_pops.head()

Unnamed: 0,NAME,POPULATION
0,alabama,5024279
1,alaska,733391
2,arizona,7151502
3,arkansas,3011524
4,california,39538223


In [7]:
pop_dict = {state: pop for state, pop in zip(state_pops['NAME'], state_pops['POPULATION'])}
del state_pops

In [8]:
spelling_dict = dict()
valid_subsets = dict()

for state_name in pop_dict.keys():
    valid_subsets[state_name] = set()
    # replacing each character with any other alphabet character
    for i in range(len(state_name)):
        for c in 'abcdefghijklmnopqrstuvwxyz':
            altered_name = state_name[:i] + c + state_name[i+1:]
            spelling_dict[altered_name] = state_name
            
            for j in range(1, len(altered_name) + 1):
                valid_subsets[state_name].add(altered_name[:j])

In [9]:
single_dict = dict()
double_dict = dict()
triple_dict = dict()
quad_dict = dict()

for state in pop_dict.keys():
    for i in range(len(state)):
        # Single character sequences
        single = state[i]
        single_dict[single] = single_dict.get(single, 0) + 1

        # Double character sequences
        if i < len(state) - 1:
            double = state[i:i+2]
            double_dict[double] = double_dict.get(double, 0) + 1

        # Triple character sequences
        if i < len(state) - 2:
            triple = state[i:i+3]
            triple_dict[triple] = triple_dict.get(triple, 0) + 1

        # Quadruple character sequences
        if i < len(state) - 3:
            quad = state[i:i+4]
            quad_dict[quad] = quad_dict.get(quad, 0) + 1

In [10]:
def is_valid_subset(curr_str, covered):
    for state, subsets in valid_subsets.items():
        if curr_str in subsets and state not in covered:
            return True
        
    return False

def get_valid_moves(grid, curr_str, i, j, covered):
    dirs = [(1, 1), (1, -1), (-1, 1), (-1, -1), (1, 0), (-1, 0), (0, 1), (0, -1)]
    moves = [(i + i_, j + j_) for i_, j_ in dirs if 0 <= i + i_ < len(grid) and 0 <= j + j_ < len(grid[0])]
    return [(i, j) for i, j in moves if is_valid_subset(curr_str + grid[i][j], covered)]

def get_names(grid, i, j, covered):
    
    def dfs(curr_i, curr_j, curr_str):
        if curr_str in spelling_dict:
            covered.add(spelling_dict[curr_str])
            return

        valid_moves = get_valid_moves(grid, curr_str, curr_i, curr_j, covered)
        for next_i, next_j in valid_moves:
            dfs(next_i, next_j, curr_str + grid[next_i][next_j])
            
    dfs(i, j, grid[i][j])
    return covered

def get_score(grid):
    covered = set()
    score = 0
    for i in range(len(grid)):
        for j in range(len(grid[0])):
            covered = get_names(grid, i, j, covered)
            
    for state in covered:
        score += pop_dict[state]
        
    return score, covered

In [11]:
print(sorted(double_dict.items(), key=lambda x: x[1], reverse=True))

[('in', 11), ('ne', 10), ('an', 9), ('or', 8), ('as', 7), ('on', 7), ('ia', 7), ('is', 7), ('la', 6), ('ar', 6), ('na', 6), ('ma', 5), ('da', 5), ('ss', 5), ('mi', 5), ('ta', 5), ('ka', 4), ('ns', 4), ('li', 4), ('ni', 4), ('co', 4), ('nn', 4), ('ut', 4), ('wa', 4), ('ou', 4), ('si', 4), ('hi', 4), ('so', 4), ('ew', 4), ('th', 4), ('al', 3), ('ri', 3), ('sa', 3), ('ca', 3), ('ol', 3), ('lo', 3), ('ic', 3), ('re', 3), ('rg', 3), ('gi', 3), ('ah', 3), ('ho', 3), ('no', 3), ('nd', 3), ('en', 3), ('nt', 3), ('se', 3), ('es', 3), ('ot', 3), ('ir', 3), ('am', 2), ('sk', 2), ('rk', 2), ('ra', 2), ('ad', 2), ('de', 2), ('aw', 2), ('id', 2), ('ha', 2), ('ai', 2), ('io', 2), ('yl', 2), ('ch', 2), ('mo', 2), ('va', 2), ('sh', 2), ('er', 2), ('ex', 2), ('wy', 2), ('yo', 2), ('rt', 2), ('hc', 2), ('ro', 2), ('hd', 2), ('ak', 2), ('ko', 2), ('om', 2), ('te', 2), ('vi', 2), ('ng', 2), ('ab', 1), ('ba', 1), ('iz', 1), ('zo', 1), ('if', 1), ('fo', 1), ('rn', 1), ('do', 1), ('ec', 1), ('ct', 1), ('ti', 

In [13]:
print(sorted(single_dict.items(), key=lambda x: x[1], reverse=True))

[('a', 61), ('i', 44), ('n', 43), ('o', 36), ('s', 32), ('e', 28), ('r', 22), ('t', 19), ('l', 15), ('h', 15), ('m', 14), ('c', 12), ('d', 11), ('w', 11), ('k', 10), ('u', 8), ('g', 8), ('y', 6), ('v', 5), ('p', 4), ('b', 2), ('f', 2), ('x', 2), ('z', 1), ('j', 1)]


In [14]:
print(sorted(pop_dict.items(), key=lambda x: x[1], reverse=True))

[('california', 39538223), ('texas', 29145505), ('florida', 21538187), ('newyork', 20201249), ('pennsylvania', 13002700), ('illinois', 12812508), ('ohio', 11799448), ('georgia', 10711908), ('northcarolina', 10439388), ('michigan', 10077331), ('newjersey', 9288994), ('virginia', 8631393), ('washington', 7693612), ('arizona', 7151502), ('massachusetts', 7029917), ('tennessee', 6910840), ('indiana', 6785528), ('maryland', 6177224), ('missouri', 6154913), ('wisconsin', 5893718), ('colorado', 5773714), ('minnesota', 5706494), ('southcarolina', 5118425), ('alabama', 5024279), ('louisiana', 4657757), ('kentucky', 4505836), ('oregon', 4237256), ('oklahoma', 3959353), ('connecticut', 3605944), ('utah', 3271616), ('iowa', 3190369), ('nevada', 3104614), ('arkansas', 3011524), ('mississippi', 2961279), ('kansas', 2937880), ('newmexico', 2117522), ('nebraska', 1961504), ('idaho', 1839106), ('westvirginia', 1793716), ('hawaii', 1455271), ('newhampshire', 1377529), ('maine', 1362359), ('rhodeisland',

In [16]:
grid = [['p', 'd', 's', 'm', 'a'],
        ['e', 'a', 'i', 'r', 'k'],
        ['a', 'n', 'o', 'l', 'a'],
        ['t', 'e', 'r', 'c', 's'],
        ['u', 'g', 'w', 'm', 'y']]

max_score = 0
max_grid = None
max_non_cali_grid = None

test_points = [(0, 0), (4, 3), ]

In [17]:
max_grid = [['p', 'd', 's', 'm', 'a'],
            ['e', 'a', 'i', 'r', 'k'],
            ['a', 'n', 'o', 'l', 'a'],
            ['w', 'e', 'r', 'c', 't'],
            ['m', 't', 'g', 'i', 'u']]

_, states = get_score(max_grid)

max_grid = [['d', 's', 's', 'd', 'a'],
            ['e', 'a', 'i', 'r', 'k'],
            ['n', 'n', 'o', 'l', 'a'],
            ['w', 'e', 'r', 'c', 't'],
            ['m', 't', 'g', 'i', 'u']]

score, new_states = get_score(max_grid)
diff = states - new_states
states = new_states

print(f'{score:,}'), states, len(states), diff

213,271,394


(None,
 {'alaska',
  'arizona',
  'arkansas',
  'california',
  'colorado',
  'florida',
  'georgia',
  'idaho',
  'illinois',
  'indiana',
  'iowa',
  'kansas',
  'louisiana',
  'maine',
  'nevada',
  'newmexico',
  'newyork',
  'northcarolina',
  'ohio',
  'oregon',
  'tennessee',
  'texas',
  'utah'},
 23,
 {'alabama'})

In [None]:
c4 = {'utah', 'nevada', 'newmexico', 'arizona'}

print(c4 - states)

In [None]:
len(states)