In [None]:
import numpy as np
import random


In [None]:
# Define the Rubik's Cube class
class RubiksCube:
    def __init__(self, initial_state):
        self.state = initial_state
        self.steps = []

    def get_state(self):
        return self.state

    def rotate_face_clockwise(self, face):
        # Rotate the specified face clockwise
        rotated_face = np.rot90(face, -1)
        return rotated_face

    def rotate_face_counterclockwise(self, face):
        # Rotate the specified face counterclockwise
        rotated_face = np.rot90(face)
        return rotated_face

    def rotate_up_clockwise(self):
        # Rotate the top face clockwise
        self.state[0] = self.rotate_face_clockwise(self.state[0])
        self.steps.append("Up Clockwise")

    def rotate_up_counterclockwise(self):
        # Rotate the top face counterclockwise
        self.state[0] = self.rotate_face_counterclockwise(self.state[0])
        self.steps.append("Up Counterclockwise")

    # Define other face rotation methods similarly

    def is_solved(self):
        # Check if all faces are the same color
        return len(set([color for face in self.state for row in face for color in row])) == 1

In [None]:
# Define the Q-learning agent class
class QLearningAgent:
    def __init__(self, state_size, action_size, learning_rate=0.1, discount_factor=0.9, exploration_rate=1.0, exploration_decay=0.99):
        self.state_size = state_size
        self.action_size = action_size
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.exploration_decay = exploration_decay
        self.q_table = np.zeros((state_size, action_size))

    def choose_action(self, state):
        # Epsilon-greedy exploration strategy
        if random.uniform(0, 1) < self.exploration_rate:
            return random.randint(0, self.action_size - 1)  # Explore
        else:
            return np.argmax(self.q_table[state])  # Exploit

    def update_q_table(self, state, action, reward, next_state):
        # Q-learning update rule
        best_next_action = np.argmax(self.q_table[next_state])
        self.q_table[state, action] += self.learning_rate * (reward + self.discount_factor * self.q_table[next_state, best_next_action] - self.q_table[state, action])

    def decay_exploration_rate(self):
        # Decay exploration rate
        self.exploration_rate *= self.exploration_decay

In [None]:
# Define the main function
def main():
    # Define the initial Rubik's Cube state (provided input)
    initial_state = np.array([
        [['red', 'red', 'red'], ['red', 'red', 'red'], ['red', 'red', 'red']],
        [['orange', 'orange', 'orange'], ['orange', 'orange', 'orange'], ['orange', 'orange', 'orange']],
        [['yellow', 'yellow', 'yellow'], ['yellow', 'yellow', 'yellow'], ['yellow', 'yellow', 'yellow']],
        [['green', 'green', 'green'], ['green', 'green', 'green'], ['green', 'green', 'green']],
        [['blue', 'blue', 'blue'], ['blue', 'blue', 'blue'], ['blue', 'blue', 'blue']],
        [['red', 'red', 'white'], ['red', 'red', 'red'], ['red', 'red', 'red']]
    ])

    # Initialize the Rubik's Cube and Q-learning agent
    rubiks_cube = RubiksCube(initial_state)
    state_size = len(set([color for face in initial_state for row in face for color in row]))  # Number of unique colors
    action_size = 12  # Number of possible actions (rotations)
    agent = QLearningAgent(state_size, action_size)

    # Train the Q-learning agent
    episodes = 1000
    for episode in range(episodes):
        state = rubiks_cube.get_state()  # Get current state
        while not rubiks_cube.is_solved():
            action = agent.choose_action(state)  # Choose action
            next_state = rubiks_cube.get_state()  # Get next state
            reward = 0  # Define reward function
            agent.update_q_table(state, action, reward, next_state)  # Update Q-table
            state = next_state  # Update state
            agent.decay_exploration_rate()  # Decay exploration rate

    # Test the trained agent
    solved = False
    state = rubiks_cube.get_state()
    while not solved:
        action = agent.choose_action(state)
        if action < 6:
            rubiks_cube.rotate_face_clockwise(action)  # Rotate face clockwise
        else:
            rubiks_cube.rotate_face_counterclockwise(action - 6)  # Rotate face counterclockwise
        state = rubiks_cube.get_state()
        solved = rubiks_cube.is_solved()

    # Display the steps taken to solve the Rubik's Cube
    print("Steps to solve the Rubik's Cube:")
    for step in rubiks_cube.steps:
        print(step)


In [None]:
if __name__ == "__main__":
    main()