# Premature Optimization is the root of all evil

In [71]:
import matplotlib.pyplot as plt
from scipy import stats
import numpy as np
from numba import jit
import pstats
import heapq
import time
import cProfile
import pandas as pd
import json

In [72]:
def getInversePerm(arr):
    # gets the inverse move for a certain move
    res = [0 for i in range(len(arr))]
    for i in range(len(arr)):
        res[arr[i]] = i
    return res


def describe_array(arr):
    print("Array:", arr)
    print("Count:", len(arr))
    print("Mean:", np.mean(arr))
    print("Median:", np.median(arr))
    print("Standard Deviation:", np.std(arr))
    print("Variance:", np.var(arr))
    print("Minimum:", np.min(arr))
    print("Maximum:", np.max(arr))
    print("Range:", np.max(arr) - np.min(arr))
    print("25th Percentile:", np.percentile(arr, 25))
    print("50th Percentile (Median):", np.percentile(arr, 50))
    print("75th Percentile:", np.percentile(arr, 75))
    print("Interquartile Range:", np.percentile(arr, 75) - np.percentile(arr, 25))

In [73]:
# File paths
puzzle_info_path = 'puzzle_info.csv'
puzzles_path = 'puzzles.csv'
sample_submission_path = 'sample_submission.csv' # change name across board
my_submission_path = 'submission.csv'

# Loading the data
puzzle_info_df = pd.read_csv(puzzle_info_path)
puzzles_df = pd.read_csv(puzzles_path)
sample_submission_df = pd.read_csv(sample_submission_path)
my_submission_df = pd.read_csv(my_submission_path)
puzzles_df.head()

Unnamed: 0,id,puzzle_type,solution_state,initial_state,num_wildcards
0,0,cube_2/2/2,A;A;A;A;B;B;B;B;C;C;C;C;D;D;D;D;E;E;E;E;F;F;F;F,D;E;D;A;E;B;A;B;C;A;C;A;D;C;D;F;F;F;E;E;B;F;B;C,0
1,1,cube_2/2/2,A;A;A;A;B;B;B;B;C;C;C;C;D;D;D;D;E;E;E;E;F;F;F;F,D;E;C;B;B;E;F;A;F;D;B;F;F;E;B;D;A;A;C;D;C;E;A;C,0
2,2,cube_2/2/2,A;A;A;A;B;B;B;B;C;C;C;C;D;D;D;D;E;E;E;E;F;F;F;F,E;F;C;C;F;A;D;D;B;B;A;F;E;B;C;A;A;B;D;F;E;E;C;D,0
3,3,cube_2/2/2,A;A;A;A;B;B;B;B;C;C;C;C;D;D;D;D;E;E;E;E;F;F;F;F,A;C;E;C;F;D;E;D;A;A;F;A;B;D;B;F;E;D;B;F;B;C;C;E,0
4,4,cube_2/2/2,A;A;A;A;B;B;B;B;C;C;C;C;D;D;D;D;E;E;E;E;F;F;F;F,E;D;E;D;A;E;F;B;A;C;F;D;F;D;C;A;F;B;C;C;B;E;B;A,0


In [74]:
# Parsing the initial_state and solution_state columns
# Converting the semicolon-separated string values into lists of colors
puzzles_df['parsed_initial_state'] = puzzles_df['initial_state'].apply(lambda x: x.split(';'))
seen = {}

for i in range(len(puzzles_df['parsed_initial_state'])):
    for j in range(len(puzzles_df['parsed_initial_state'][i])):
        if puzzles_df['parsed_initial_state'][i][j] not in seen:
            seen[puzzles_df['parsed_initial_state'][i][j]] = len(seen)
        puzzles_df['parsed_initial_state'][i][j] = seen[puzzles_df['parsed_initial_state'][i][j]]

puzzles_df['parsed_solution_state'] = puzzles_df['solution_state'].apply(lambda x: x.split(';'))

for i in range(len(puzzles_df['parsed_solution_state'])):
    for j in range(len(puzzles_df['parsed_solution_state'][i])):
        puzzles_df['parsed_solution_state'][i][j] = seen[puzzles_df['parsed_solution_state'][i][j]]

# Displaying the modified dataframe with parsed states
puzzles_df[['id', 'puzzle_type', 'parsed_initial_state', 'parsed_solution_state']].head()

Unnamed: 0,id,puzzle_type,parsed_initial_state,parsed_solution_state
0,0,cube_2/2/2,"[0, 1, 0, 2, 1, 3, 2, 3, 4, 2, 4, 2, 0, 4, 0, ...","[2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 0, 0, 0, ..."
1,1,cube_2/2/2,"[0, 1, 4, 3, 3, 1, 5, 2, 5, 0, 3, 5, 5, 1, 3, ...","[2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 0, 0, 0, ..."
2,2,cube_2/2/2,"[1, 5, 4, 4, 5, 2, 0, 0, 3, 3, 2, 5, 1, 3, 4, ...","[2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 0, 0, 0, ..."
3,3,cube_2/2/2,"[2, 4, 1, 4, 5, 0, 1, 0, 2, 2, 5, 2, 3, 0, 3, ...","[2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 0, 0, 0, ..."
4,4,cube_2/2/2,"[1, 0, 1, 0, 2, 1, 5, 3, 2, 4, 5, 0, 5, 0, 4, ...","[2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 0, 0, 0, ..."


In [75]:
# Converting the string representation of allowed_moves to dictionary
puzzle_info_df['allowed_moves'] = puzzle_info_df['allowed_moves'].apply(lambda x: json.loads(x.replace("'", '"')))

# Selecting an example puzzle type and displaying its allowed moves
example_puzzle_type = puzzle_info_df['puzzle_type'].iloc[0]
example_allowed_moves = puzzle_info_df[puzzle_info_df['puzzle_type'] == example_puzzle_type]['allowed_moves'].iloc[0]

In [76]:
# type : (np.array(move_perm_i), np.array(name_i))
puz_info = {}

# type : {move : perm}
move_to_perm = {}

for i in range(len(puzzle_info_df)):
    puz_info[puzzle_info_df['puzzle_type'][i]] = [[], []]
    move_to_perm[puzzle_info_df['puzzle_type'][i]] = {}
    
    for j in puzzle_info_df['allowed_moves'][i].keys():
        puz_info[puzzle_info_df['puzzle_type'][i]][1].append(j)
        puz_info[puzzle_info_df['puzzle_type'][i]][0].append(np.array(puzzle_info_df['allowed_moves'][i][j]))

        puz_info[puzzle_info_df['puzzle_type'][i]][1].append(str('-' + j)) # might be the opposite
        puz_info[puzzle_info_df['puzzle_type'][i]][0].append(np.array(getInversePerm(puzzle_info_df['allowed_moves'][i][j])))

        move_to_perm[puzzle_info_df['puzzle_type'][i]][str('-' + j)] = np.array(getInversePerm(puzzle_info_df['allowed_moves'][i][j]))
        move_to_perm[puzzle_info_df['puzzle_type'][i]][j] = np.array(puzzle_info_df['allowed_moves'][i][j])

# move_to_perm['cube_2/2/2']

In [77]:
# optimize reference solution
reference_solution = pd.read_csv(sample_submission_path)
reference_solution

Unnamed: 0,id,moves
0,0,r1.-f1
1,1,f1.d0.-r0.-f1.-d0.-f1.d0.-r0.f0.-f1.-r0.f1.-d1...
2,2,f1.d0.-d1.r0.-d1.-f0.f1.-r0.-f0.-r1.-f0.r0.-d0...
3,3,-f0.-r0.-f0.-d0.-f0.f1.r0.-d1.-r0.-r1.-r0.-f1....
4,4,d1.-f1.d1.r1.-f0.d1.-d0.-r1.d1.d1.-f1.d1.-d0.-...
...,...,...
393,393,f19.f21.-f39.f20.f2.-f5.f7.-r3.f55.-f12.f65.-f...
394,394,-f31.-f22.f16.-f17.-f13.-f24.-f14.f2.f21.f44.f...
395,395,-r0.-f42.-f8.f16.-f49.f14.-f1.f56.f26.f35.f62....
396,396,f25.-f29.f46.f49.-f8.f27.f26.-f20.f2.-f20.f6.f...


# Submission Stats

In [78]:
tot = 0
arr = []
for i in range(len(sample_submission_df)):
    arr.append(len(sample_submission_df['moves'][i].split('.')))
    tot += len(sample_submission_df['moves'][i].split('.'))

describe_array(arr)
print("Score:", tot)

Array: [2, 63, 62, 92, 70, 54, 68, 83, 98, 76, 66, 63, 72, 131, 96, 68, 63, 62, 89, 82, 112, 96, 63, 53, 99, 61, 93, 73, 83, 82, 300, 392, 268, 239, 304, 440, 267, 299, 237, 408, 259, 296, 208, 229, 602, 584, 327, 358, 242, 258, 238, 370, 479, 355, 230, 309, 285, 344, 282, 415, 251, 236, 385, 225, 429, 200, 250, 331, 298, 225, 205, 266, 327, 392, 408, 320, 382, 358, 257, 254, 372, 221, 251, 300, 260, 227, 265, 310, 209, 268, 277, 303, 216, 214, 317, 287, 425, 323, 292, 249, 263, 356, 274, 398, 221, 231, 207, 340, 342, 268, 257, 265, 399, 219, 262, 273, 329, 315, 385, 360, 331, 295, 339, 346, 301, 311, 268, 264, 260, 299, 362, 401, 215, 268, 395, 372, 227, 376, 257, 241, 209, 287, 412, 474, 399, 267, 358, 263, 233, 258, 700, 610, 498, 593, 665, 544, 642, 637, 443, 697, 598, 538, 789, 816, 542, 616, 597, 465, 420, 670, 558, 519, 466, 736, 483, 472, 737, 493, 534, 486, 670, 592, 491, 675, 499, 888, 595, 734, 701, 451, 458, 731, 737, 659, 495, 594, 947, 550, 422, 517, 623, 633, 439, 586, 5

In [79]:
print("## MY SUBMISSION INFO ##")

tot = 0
arr = []
for i in range(len(my_submission_df)):
    arr.append(len(my_submission_df['moves'][i].split('.')))
    tot += len(my_submission_df['moves'][i].split('.'))

describe_array(arr)
print()
print("Score:", tot)

## MY SUBMISSION INFO ##
Array: [2, 9, 12, 10, 12, 12, 12, 11, 12, 12, 10, 11, 12, 11, 12, 10, 11, 12, 11, 12, 12, 14, 13, 11, 11, 13, 13, 13, 13, 10, 300, 392, 268, 239, 304, 440, 267, 299, 237, 408, 259, 296, 208, 229, 602, 584, 327, 358, 242, 258, 238, 370, 479, 355, 230, 309, 285, 344, 282, 415, 251, 236, 385, 225, 429, 200, 250, 331, 298, 225, 205, 266, 327, 392, 408, 320, 382, 358, 257, 254, 372, 221, 251, 300, 260, 227, 265, 310, 209, 268, 277, 303, 216, 214, 317, 287, 425, 323, 292, 249, 263, 356, 274, 398, 221, 231, 207, 340, 342, 268, 257, 265, 399, 219, 262, 273, 329, 315, 385, 360, 331, 295, 339, 346, 301, 311, 268, 264, 260, 299, 362, 401, 215, 268, 395, 372, 227, 376, 257, 241, 209, 287, 412, 474, 399, 267, 358, 263, 233, 258, 700, 610, 498, 593, 665, 544, 642, 637, 443, 697, 598, 538, 789, 816, 542, 616, 597, 465, 420, 670, 558, 519, 466, 736, 483, 472, 737, 493, 534, 486, 670, 592, 491, 675, 499, 888, 595, 734, 701, 451, 458, 731, 737, 659, 495, 594, 947, 550, 422, 517,

# Puzzle Type Stats

type : nodes : degree : # of times puzzle is in puzzles

In [80]:
# type : (np.array(move_perm_i), np.array(name_i))
puz_info

for i in puz_info.keys():
    print(i, '-->', len(puz_info[i][0][0]), len(puz_info[i][0]), len(puzzles_df[puzzles_df['puzzle_type'] == i]))
    # print("Nodes:", len(puz_info[i][0][0]))
    # print("Degree:", len(puz_info[i][0]))

# type : {move : perm}
# move_to_perm

cube_2/2/2 --> 24 12 30
cube_3/3/3 --> 54 18 120
cube_4/4/4 --> 96 24 60
cube_5/5/5 --> 150 30 35
cube_6/6/6 --> 216 36 12
cube_7/7/7 --> 294 42 5
cube_8/8/8 --> 384 48 5
cube_9/9/9 --> 486 54 5
cube_10/10/10 --> 600 60 5
cube_19/19/19 --> 2166 114 4
cube_33/33/33 --> 6534 198 3
wreath_6/6 --> 10 4 20
wreath_7/7 --> 12 4 15
wreath_12/12 --> 22 4 10
wreath_21/21 --> 40 4 5
wreath_33/33 --> 64 4 3
wreath_100/100 --> 198 4 1
globe_1/8 --> 32 36 10
globe_1/16 --> 64 68 5
globe_2/6 --> 36 30 5
globe_3/4 --> 32 24 15
globe_6/4 --> 56 30 5
globe_6/8 --> 112 46 5
globe_6/10 --> 140 54 5
globe_3/33 --> 264 140 4
globe_33/3 --> 264 140 4
globe_8/25 --> 450 118 2


In [85]:
puzzles_df[puzzles_df['puzzle_type'] == 'wreath_21/21'].head(1)

Unnamed: 0,id,puzzle_type,solution_state,initial_state,num_wildcards,parsed_initial_state,parsed_solution_state
329,329,wreath_21/21,C;A;A;A;A;A;C;A;A;A;A;A;A;A;A;A;A;A;A;A;A;B;B;...,B;B;A;B;A;A;A;B;A;B;A;B;B;A;A;B;A;B;A;A;B;A;C;...,0,"[3, 3, 2, 3, 2, 2, 2, 3, 2, 3, 2, 3, 3, 2, 2, ...","[4, 2, 2, 2, 2, 2, 4, 2, 2, 2, 2, 2, 2, 2, 2, ..."


In [86]:
puzzles_df[puzzles_df['puzzle_type'] == 'cube_3/3/3'].head(1)

Unnamed: 0,id,puzzle_type,solution_state,initial_state,num_wildcards,parsed_initial_state,parsed_solution_state
30,30,cube_3/3/3,A;A;A;A;A;A;A;A;A;B;B;B;B;B;B;B;B;B;C;C;C;C;C;...,A;A;E;C;F;F;C;C;C;D;B;D;D;B;A;B;F;F;A;E;B;D;E;...,0,"[2, 2, 1, 4, 5, 5, 4, 4, 4, 0, 3, 0, 0, 3, 2, ...","[2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, ..."


In [87]:
puzzles_df[puzzles_df['puzzle_type'] == 'globe_1/8'].head(1)

Unnamed: 0,id,puzzle_type,solution_state,initial_state,num_wildcards,parsed_initial_state,parsed_solution_state
338,338,globe_1/8,A;A;C;C;E;E;G;G;I;I;K;K;M;M;O;O;B;B;D;D;F;F;H;...,I;P;O;A;A;D;F;L;J;M;G;M;P;F;E;J;E;B;O;G;H;D;N;...,0,"[6540, 6541, 6542, 2, 2, 0, 5, 6543, 6544, 654...","[2, 2, 4, 4, 1, 1, 6546, 6546, 6540, 6540, 654..."
