# Premature Optimization is the root of all evil

In [1]:
import matplotlib.pyplot as plt
from scipy import stats
import numpy as np
from numba import jit
import pstats
import heapq
import time
import cProfile
import pandas as pd

In [2]:
def getInversePerm(arr):
    # gets the inverse move for a certain move
    res = [0 for i in range(len(arr))]
    for i in range(len(arr)):
        res[arr[i]] = i
    return res

In [3]:
# File paths
puzzle_info_path = 'puzzle_info.csv'
puzzles_path = 'puzzles.csv'
sample_submission_path = 'submission.csv' # change name across board

# Loading the data
puzzle_info_df = pd.read_csv(puzzle_info_path)
puzzles_df = pd.read_csv(puzzles_path)
sample_submission_df = pd.read_csv(sample_submission_path)

# Displaying the first few rows of each dataframe
puzzles_df.describe()

Unnamed: 0,id,num_wildcards
count,398.0,398.0
mean,198.5,1.809045
std,115.036951,10.047931
min,0.0,0.0
25%,99.25,0.0
50%,198.5,0.0
75%,297.75,0.0
max,397.0,176.0


In [4]:
puzzles_df.head()

Unnamed: 0,id,puzzle_type,solution_state,initial_state,num_wildcards
0,0,cube_2/2/2,A;A;A;A;B;B;B;B;C;C;C;C;D;D;D;D;E;E;E;E;F;F;F;F,D;E;D;A;E;B;A;B;C;A;C;A;D;C;D;F;F;F;E;E;B;F;B;C,0
1,1,cube_2/2/2,A;A;A;A;B;B;B;B;C;C;C;C;D;D;D;D;E;E;E;E;F;F;F;F,D;E;C;B;B;E;F;A;F;D;B;F;F;E;B;D;A;A;C;D;C;E;A;C,0
2,2,cube_2/2/2,A;A;A;A;B;B;B;B;C;C;C;C;D;D;D;D;E;E;E;E;F;F;F;F,E;F;C;C;F;A;D;D;B;B;A;F;E;B;C;A;A;B;D;F;E;E;C;D,0
3,3,cube_2/2/2,A;A;A;A;B;B;B;B;C;C;C;C;D;D;D;D;E;E;E;E;F;F;F;F,A;C;E;C;F;D;E;D;A;A;F;A;B;D;B;F;E;D;B;F;B;C;C;E,0
4,4,cube_2/2/2,A;A;A;A;B;B;B;B;C;C;C;C;D;D;D;D;E;E;E;E;F;F;F;F,E;D;E;D;A;E;F;B;A;C;F;D;F;D;C;A;F;B;C;C;B;E;B;A,0


In [5]:
# Parsing the initial_state and solution_state columns
# Converting the semicolon-separated string values into lists of colors
puzzles_df['parsed_initial_state'] = puzzles_df['initial_state'].apply(lambda x: x.split(';'))
seen = {}

for i in range(len(puzzles_df['parsed_initial_state'])):
    for j in range(len(puzzles_df['parsed_initial_state'][i])):
        if puzzles_df['parsed_initial_state'][i][j] not in seen:
            seen[puzzles_df['parsed_initial_state'][i][j]] = len(seen)
        puzzles_df['parsed_initial_state'][i][j] = seen[puzzles_df['parsed_initial_state'][i][j]]

puzzles_df['parsed_solution_state'] = puzzles_df['solution_state'].apply(lambda x: x.split(';'))

for i in range(len(puzzles_df['parsed_solution_state'])):
    for j in range(len(puzzles_df['parsed_solution_state'][i])):
        puzzles_df['parsed_solution_state'][i][j] = seen[puzzles_df['parsed_solution_state'][i][j]]

# Displaying the modified dataframe with parsed states
puzzles_df[['id', 'puzzle_type', 'parsed_initial_state', 'parsed_solution_state']].head()

Unnamed: 0,id,puzzle_type,parsed_initial_state,parsed_solution_state
0,0,cube_2/2/2,"[0, 1, 0, 2, 1, 3, 2, 3, 4, 2, 4, 2, 0, 4, 0, ...","[2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 0, 0, 0, ..."
1,1,cube_2/2/2,"[0, 1, 4, 3, 3, 1, 5, 2, 5, 0, 3, 5, 5, 1, 3, ...","[2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 0, 0, 0, ..."
2,2,cube_2/2/2,"[1, 5, 4, 4, 5, 2, 0, 0, 3, 3, 2, 5, 1, 3, 4, ...","[2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 0, 0, 0, ..."
3,3,cube_2/2/2,"[2, 4, 1, 4, 5, 0, 1, 0, 2, 2, 5, 2, 3, 0, 3, ...","[2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 0, 0, 0, ..."
4,4,cube_2/2/2,"[1, 0, 1, 0, 2, 1, 5, 3, 2, 4, 5, 0, 5, 0, 4, ...","[2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 0, 0, 0, ..."


In [6]:
import json

# Converting the string representation of allowed_moves to dictionary
puzzle_info_df['allowed_moves'] = puzzle_info_df['allowed_moves'].apply(lambda x: json.loads(x.replace("'", '"')))

# Selecting an example puzzle type and displaying its allowed moves
example_puzzle_type = puzzle_info_df['puzzle_type'].iloc[0]
example_allowed_moves = puzzle_info_df[puzzle_info_df['puzzle_type'] == example_puzzle_type]['allowed_moves'].iloc[0]

example_puzzle_type

'cube_2/2/2'

In [7]:
# type : (np.array(move_perm_i), np.array(name_i))
puz_info = {}

for i in range(len(puzzle_info_df)):
    puz_info[puzzle_info_df['puzzle_type'][i]] = [[], []]
    for j in puzzle_info_df['allowed_moves'][i].keys():
        puz_info[puzzle_info_df['puzzle_type'][i]][1].append(j)
        puz_info[puzzle_info_df['puzzle_type'][i]][0].append(np.array(puzzle_info_df['allowed_moves'][i][j]))

        puz_info[puzzle_info_df['puzzle_type'][i]][1].append(str('-' + j)) # might be the opposite
        puz_info[puzzle_info_df['puzzle_type'][i]][0].append(np.array(getInversePerm(puzzle_info_df['allowed_moves'][i][j])))

# Step 5: Develop search algorithm

In [8]:
@jit(nopython=True, parallel = True, fastmath = True)
def hash(perm):
    base = 9973
    modb = 1000000007
    modc = 1000000009

    B, C = 0, 0
    for i in perm:
        B = (B * base) % modb + i
        C = (C * base) % modc + i

    return (B, C)

def dist(a, b):
    return np.count_nonzero(a != b)

In [26]:
# 1e6 ~ 2 seconds
mx_mem = int(1e6)
mem_idx = 0

last_state = np.zeros(mx_mem, dtype=int)
last_move = np.zeros(mx_mem, dtype=int)

print(last_state)

[0 0 0 ... 0 0 0]


In [27]:
%%time

def a_star_search(idx):
    global mem_idx
    
    initial_state = np.array(puzzles_df['parsed_initial_state'][idx])
    goal_state = np.array(puzzles_df['parsed_solution_state'][idx])
    max_dist = puzzles_df['num_wildcards'][idx]
    move_perm = np.array(puz_info[puzzles_df['puzzle_type'][idx]][0])

    n = len(initial_state)
    m = len(move_perm)
    
    pq = []
    heapq.heappush(pq, (0, mem_idx, initial_state))  # (priority, mem_idx, state)
    mem_idx += 1
    
    considered = set()
    considered.add(hash(initial_state))

    while mem_idx + m < mx_mem:
        cur_p, cur_idx, cur_state = heapq.heappop(pq) 
        cur_dist = dist(cur_state, goal_state)
        cur_p -= cur_dist 

        if cur_dist <= max_dist:
            return cur_idx
        
        for i in range(m):
            new_state = cur_state[move_perm[i]]
            cur_hash = hash(new_state)
            
            if cur_hash not in considered:
                considered.add(cur_hash)
                
                last_state[mem_idx] = cur_idx
                last_move[mem_idx] = i
                
                priority = cur_p + 1 + dist(new_state, goal_state)
                
                heapq.heappush(pq, (priority, mem_idx, new_state))
                mem_idx += 1
    return None


cProfile.run('a_star_search(37)')
# print(a_star_search(20))

         7408732 function calls in 3.784 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
  1102393    0.434    0.000    0.434    0.000 2450248532.py:1(hash)
  1061238    0.695    0.000    0.981    0.000 2450248532.py:14(dist)
        1    0.477    0.477    3.784    3.784 <string>:1(<module>)
        1    1.587    1.587    3.307    3.307 <timed exec>:1(a_star_search)
        4    0.000    0.000    0.000    0.000 __init__.py:33(using_copy_on_write)
        4    0.000    0.000    0.000    0.000 base.py:5299(__contains__)
        8    0.000    0.000    0.000    0.000 common.py:367(apply_if_callable)
        4    0.000    0.000    0.000    0.000 frame.py:3856(__getitem__)
        4    0.000    0.000    0.000    0.000 frame.py:4405(_get_item_cache)
        8    0.000    0.000    0.000    0.000 indexing.py:2678(check_dict_or_set_indexers)
        4    0.000    0.000    0.000    0.000 inference.py:334(is_hashable)
        4    0.00

In [28]:
def dig_through_memory(idx):
    res = []
    while idx != 0:
        res.append(last_move[idx])
        idx = last_state[idx]

    res = list(reversed(res))
    return res

def solve(idx):
    global mem_idx
    
    mem_idx = 0
    
    move_name = np.array(puz_info[puzzles_df['puzzle_type'][idx]][1])

    res = a_star_search(idx)
    
    if res == None:
        return sample_submission_df['moves'][idx]

    print("OK :", idx)
    res = dig_through_memory(res)
    for i in range(len(res)):
        res[i] = move_name[res[i]]

    return res

cProfile.run("solve(2)")

         6012304 function calls in 2.793 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.000    0.000 1180216188.py:1(dig_through_memory)
        1    0.269    0.269    2.792    2.792 1180216188.py:10(solve)
   954925    0.207    0.000    0.207    0.000 2450248532.py:1(hash)
   856125    0.545    0.000    0.771    0.000 2450248532.py:14(dist)
        1    0.000    0.000    2.792    2.792 <string>:1(<module>)
        1    1.273    1.273    2.524    2.524 <timed exec>:1(a_star_search)
        5    0.000    0.000    0.000    0.000 __init__.py:33(using_copy_on_write)
        5    0.000    0.000    0.000    0.000 base.py:5299(__contains__)
       10    0.000    0.000    0.000    0.000 common.py:367(apply_if_callable)
        5    0.000    0.000    0.000    0.000 frame.py:3856(__getitem__)
        5    0.000    0.000    0.000    0.000 frame.py:4405(_get_item_cache)
       10    0.000    0.000    0

# Step 6: Build submission format function

In [29]:
def format_solution_for_submission(puzzle_id, solution_moves):
    """
    Format the solution to a puzzle for submission.

    :param puzzle_id: The unique identifier of the puzzle.
    :param solution_moves: List of tuples representing the solution moves.
    :return: Formatted string suitable for submission.
    """
    formatted_moves = []
    for move in solution_moves:
        formatted_moves.append(move)

    # Joining the moves into a single string separated by periods
    return {'id': puzzle_id, 'moves': '.'.join(formatted_moves)}


# Step 7: Define solve function

In [30]:
from tqdm import tqdm

all_ok = []
res_df = []

for i in tqdm(range(len(puzzles_df))):
    res = solve(i)
    # print(res)
    if res == None:
        res = format_solution_for_submission(i, sample_submission_df['moves'][i].split('.'))
    else:
        all_ok.append(i)
        res = format_solution_for_submission(i, res)
    res_df.append(res)

 71%|████████████████████████████▉            | 281/398 [13:55<41:01, 21.03s/it]Exception ignored in: <generator object tqdm.__iter__ at 0x1064ba050>
Traceback (most recent call last):
  File "/opt/homebrew/lib/python3.11/site-packages/tqdm/std.py", line 1197, in __iter__
    self.close()
  File "/opt/homebrew/lib/python3.11/site-packages/tqdm/std.py", line 1303, in close
    self.display(pos=0)
  File "/opt/homebrew/lib/python3.11/site-packages/tqdm/std.py", line 1496, in display
    self.sp(self.__str__() if msg is None else msg)
            ^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.11/site-packages/tqdm/std.py", line 1152, in __str__
    return self.format_meter(**self.format_dict)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.11/site-packages/tqdm/std.py", line 618, in format_meter
    l_bar += '{0:3.0f}%|'.format(percentage)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
KeyboardInterrupt: 

KeyboardInterrupt



In [31]:
for i in tqdm(range(len(res_df), len(puzzles_df))):
    res = None
    # print(res)
    if res == None:
        res = format_solution_for_submission(i, sample_submission_df['moves'][i].split('.'))
    else:
        all_ok.append(i)
        res = format_solution_for_submission(i, res)
    res_df.append(res)

100%|███████████████████████████████████████| 117/117 [00:00<00:00, 1196.62it/s]


In [34]:
print(all_ok)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221,

In [32]:
res_df = pd.DataFrame(res_df)
res_df

Unnamed: 0,id,moves
0,0,r1.-f1
1,1,f0.r1.f1.-d0.-d0.f1.-d0.-f1.d0
2,2,-f1.-r0.-f0.-r1.-f0.d0.-r1.-d0.f0.d0.d0.-f0
3,3,-f0.d0.-r0.f0.-d0.-r0.d0.-f0.-r0.-f0
4,4,f0.-r0.-f1.-r0.-d1.-r0.-d1.-d1.r0.d0.f1.-d0
...,...,...
393,393,f19.f21.-f39.f20.f2.-f5.f7.-r3.f55.-f12.f65.-f...
394,394,-f31.-f22.f16.-f17.-f13.-f24.-f14.f2.f21.f44.f...
395,395,-r0.-f42.-f8.f16.-f49.f14.-f1.f56.f26.f35.f62....
396,396,f25.-f29.f46.f49.-f8.f27.f26.-f20.f2.-f20.f6.f...


# Step 9: Save submission

In [35]:
# Define the file path for the output CSV file
output_csv_path = 'submission.csv'

# Save the output DataFrame to a CSV file
res_df.to_csv(output_csv_path, index=False)

# Return the path of the saved file
output_csv_path

'submission.csv'