In [1]:
import ast
import os
import numpy as np
import pandas as pd
from tqdm import tqdm

# root_dir = "/kaggle/input/santa-2023"
root_dir = "competition_data"

In [2]:
puzzles_df = pd.read_csv(os.path.join(root_dir, 'puzzles.csv'))
puzzle_info_df = pd.read_csv(os.path.join(root_dir, 'puzzle_info.csv'))
submission_df = pd.read_csv(os.path.join(root_dir, 'sample_submission.csv'))

In [3]:
super_df = pd.merge(puzzles_df, puzzle_info_df, on='puzzle_type')
super_df = pd.merge(super_df, submission_df, on='id')

### Some quick notes about puzzle structure
- Cube_x/x/x puzzles have a set of moves {f0, f1, ..., f(x-1), r0, r1, ..., r(x-1), d0, d1, ..., d(x-1)}
- Wreath_x/x puzzles have a set of moves {l, r}
- Globe_x/y puzzles have a set of moves {r0, r1, ..., r(x), f0, f1, ..., f(2y-1)}

In fact, Cube_x/x/x puzzles are just Rubik's Cube type puzzles, Wreath_x/x puzzles are Hungarian Rings (or Devil's Circles), and Globe_x/y puzzles are Rainbow Masterball (or just Masterball) puzzles. Reference: https://www.permutationpuzzles.org/rubik/webnotes/rubik.pdf

This helps us understand the move orientations as well: 
- The cube moves are just rotations of an x-by-x-by-1 layer of cubes
- The wreath moves are just a counterclockwise rotation of either the left or right ring
- The globe moves are either a rotation of a latitude-wise layer (of which there are x+1) or a flipping along a line of longitude (of which there are 2y). 

Of course, negative moves are also possible, though certain move sequences would be obviously sub-optimal. For example, having `f0` followed by `-f0` in the cube puzzle would be redundant.

More information is given in the [getting started notebook](https://www.kaggle.com/code/ryanholbrook/getting-started-with-santa-2023)

### Breaking down which puzzles have the most moves

In [4]:
# Count the total number of moves in the submission from each puzzle type
super_df['total_moves'] = super_df['moves'].apply(lambda x: len(x.split(".")))
super_df['num_puzzles'] = 1
super_group_df = super_df.groupby('puzzle_type').agg({'total_moves': 'sum', 'num_puzzles': 'sum'}).reset_index()
super_group_df = super_group_df.sort_values(by=['total_moves'], ascending=False)
super_group_df

Unnamed: 0,puzzle_type,total_moves,num_puzzles
4,cube_33/33/33,372200,3
14,globe_3/33,226350,8
1,cube_19/19/19,102317,4
6,cube_5/5/5,51254,35
0,cube_10/10/10,50465,5
19,globe_8/25,47513,2
3,cube_3/3/3,36661,120
5,cube_4/4/4,35320,60
10,cube_9/9/9,34550,5
15,globe_3/4,29526,15


We can see that out of about 1.2 million moves in the initial submission file, over half are taken up by just 15 puzzles. All of these are of type 33x33 cube, 19x19 cube, or 3x33 globe. Conversely, all of the wreath puzzles put together only require about 100,000 moves out of the total. Therefore, if we want to find the fastest way up the leaderboard, we should focus on finding better solutions to the largest cube and globe puzzles.

In [7]:
# Sort submission by total moves
super_df_sorted = super_df.sort_values(by=['total_moves'], ascending=False)
super_df_sorted[["puzzle_type", "num_wildcards", "total_moves"]].iloc[:50]

Unnamed: 0,puzzle_type,num_wildcards,total_moves
282,cube_33/33/33,0,139629
281,cube_33/33/33,0,123431
283,cube_33/33/33,0,109140
389,globe_3/33,0,32610
279,cube_19/19/19,0,32249
391,globe_3/33,0,30775
393,globe_3/33,0,30596
394,globe_3/33,0,28971
395,globe_3/33,0,28876
388,globe_3/33,6,26960


In [8]:
super_df_sorted["total_moves"].iloc[:50].sum()

967058

Looking at the individual puzzles, we also notice that 10x10, 9x9, 8x8, and 7x7 cubes; 8x25, 3x4, and 6x4 globes; and the larger wreaths are also represented in the top 50 longest puzzles. In fact, more than 80% (960,000 out of 1.2 million) of the total moves are taken up by the largest 13% (50 out of 398) of puzzles.

In [11]:
super_solution_df = super_df.groupby(['puzzle_type','solution_state']).agg({'total_moves': 'sum', 'num_puzzles': 'sum'}).reset_index()
super_solution_df = super_solution_df.sort_values(by=['total_moves'], ascending=False)
super_solution_df

Unnamed: 0,puzzle_type,solution_state,total_moves,num_puzzles
27,globe_3/33,A;A;A;A;A;A;C;C;C;C;C;C;E;E;E;E;E;E;G;G;G;G;G;...,166699,6
9,cube_33/33/33,A;B;A;B;A;B;A;B;A;B;A;B;A;B;A;B;A;B;A;B;A;B;A;...,139629,1
8,cube_33/33/33,A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;...,123431,1
10,cube_33/33/33,N0;N1;N2;N3;N4;N5;N6;N7;N8;N9;N10;N11;N12;N13;...,109140,1
1,cube_19/19/19,A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;...,102317,4
28,globe_3/33,N0;N1;N2;N3;N4;N5;N6;N7;N8;N9;N10;N11;N12;N13;...,59651,2
0,cube_10/10/10,A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;...,50465,5
34,globe_8/25,A;A;A;A;A;D;D;D;D;D;G;G;G;G;G;J;J;J;J;J;M;M;M;...,47513,2
14,cube_5/5/5,A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;...,35869,25
22,cube_9/9/9,A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;A;...,34550,5


In [13]:
indiv_colours = super_solution_df[super_solution_df["solution_state"].str.contains("N0")]
print(indiv_colours["total_moves"].sum())
common_face_colours = super_solution_df[super_solution_df["solution_state"].str.contains("A;A")]
print(common_face_colours["total_moves"].sum())

211972
852146


The puzzles with the individualized colours ("N0", "N1", etc.) account for about 1/6th of the total moves. The puzzles with large common blocks of colour (i.e., the cubes with one colour on each face, the globes with common sections of the same colour, and all the wreath puzzles) account for more than 70% of the total moves. Therefore, we can make a lot of progress by just focusing on the large common-colour puzzles. and ignoring the "checkered" cubes. (One exception to this rule - the 33x33 cube puzzles have one with one colour per face, one "checkered" and one with individualized colours.)

## Let's define some game objects to explore and manipulate

In [5]:
# Try solving a cube puzzle with MCTS
class PermutationPuzzle(object):
    def __init__(self, puzzle_series: pd.Series):
        self.puzzle_type = puzzle_series["puzzle_type"]
        self.initial_state = np.array(puzzle_series["initial_state"].split(";"))
        self.solution_state = np.array(puzzle_series["solution_state"].split(";"))
        self.state = self.initial_state.copy()
        self.path = []
        self.num_wildcards = puzzle_series["num_wildcards"]

        # More fine-grained information about the puzzle type
        p_type_items = self.puzzle_type.split("_")
        self.puzzle_shape = p_type_items[0]
        numbers = p_type_items[1].split("/")
        self.dimensions = [int(k) for k in numbers]

        # Create the actions dictionaries
        base_actions_dict = ast.literal_eval(puzzle_info_df.loc[puzzle_info_df['puzzle_type'] == self.puzzle_type, 'allowed_moves'].values[-1])
        self.actions_dict = {}
        self.actions_index = {}     # Need this object to use integers to index the actions, rather than the identifying string
        for i, (k, v) in enumerate(base_actions_dict.items()):
            self.actions_dict[k] = np.array(v)
            self.actions_dict["-" + k] = np.argsort(np.array(v))
            self.actions_index[2*i] = k
            self.actions_index[2*i+1] = "-" + k
        self.actions_index_reverse = {v: k for k, v in self.actions_index.items()}
        self.actions_dict_length = len(self.actions_dict)

    def render(self) -> None:
        # Print the current state in the same format as the initial and solution states
        print(";".join(self.state))
        return
    
    def get_state(self) -> list[str]:
        return self.state

    def reset_state(self) -> None:
        self.state = self.initial_state.copy()
        self.path = []
        return

    def possible_actions(self) -> list[int]:
        actions_list = list(np.arange(self.actions_dict_length))
        if len(self.path) == 0:
            return actions_list
        
        # Prevent the reverse of the action we just took from being selected right away
        last_move = self.path[-1]
        actions_list.remove(self.actions_index_reverse[last_move])
        return actions_list
    
    def possible_actions_by_name(self) -> list[str]:
        actions_list = list(self.actions_dict.keys())
        if len(self.path) == 0:
            return actions_list
        
        # Prevent the reverse of the action we just took from being selected right away
        last_move = self.path[-1]
        actions_list.remove(self.actions_index_reverse[last_move])
        return actions_list
    
    def take_action(self, action: int) -> None:
        # Add the action to the path
        self.path.append(self.actions_index[action])
        # Execute the permutation action
        perm = self.actions_dict[self.actions_index[action]]
        self.state = self.state[perm]
        return 

    def is_terminated(self) -> bool:
        wc_count = 0
        for i in range(len(self.state)):
            if self.state[i] != self.solution_state[i]:
                wc_count += 1

        if wc_count > self.num_wildcards:
            return False
        else:
            return True

Examine the wreath puzzle

In [6]:
test_wreath_series = puzzles_df.iloc[284]


In [7]:
test_wreath_A = PermutationPuzzle(test_wreath_series)
test_wreath_B = PermutationPuzzle(test_wreath_series)

In [8]:
test_wreath_A.possible_actions_by_name()

['l', '-l', 'r', '-r']

In [15]:
# Apply 3 left ring rotations
for _ in range(4):
    test_wreath_A.take_action(2)
for _ in range(2):
    test_wreath_B.take_action(3)
print(test_wreath_A.get_state())
print(test_wreath_B.get_state())

['B' 'A' 'B' 'A' 'B' 'C' 'B' 'A' 'C' 'A']
['B' 'A' 'B' 'A' 'B' 'C' 'B' 'A' 'C' 'A']


In [16]:
test_wreath_A.reset_state()
test_wreath_B.reset_state()