#10-701 Final Project

We will train an inverse reinforcement learning algorithm to create satisfying patterns in the game 'Conway's game of life'

## 1. Basic Setup for Conway's Game of Life

In [1]:
import numpy as np

def initialize_pattern(grid_size=(10, 10), density=0.3):
    """Generate a random initial pattern."""
    return (np.random.rand(*grid_size) < density).astype(int)

def step_game_of_life(grid):
    """Apply Conway's rules to evolve the pattern."""
    neighbors = sum(np.roll(np.roll(grid, i, 0), j, 1)
                    for i, j in [(-1, -1), (-1, 0), (-1, 1),
                                 (0, -1),        (0, 1),
                                 (1, -1),  (1, 0), (1, 1)])
    return (neighbors == 3) | ((grid == 1) & (neighbors == 2)).astype(int)

def simulate_game(grid, steps=5):
    """Simulate the game for a given number of steps."""
    for _ in range(steps):
        grid = step_game_of_life(grid)
    return grid


## 2. Collect user feedback

In [2]:
def get_user_feedback(pattern):
    """
    Display pattern to the user and collect feedback.
    For now, we will simulate feedback by assigning a random score.
    We will replace this later with actual user input.
    """
    print(pattern)
    # Simulated feedback
    return np.random.randint(1, 11)  # User rating (1-10)


## 3. Inverse Reinforcement Learning

In [3]:
class PreferenceModel:
    """Model to learn user preferences."""
    def __init__(self, grid_size=(10, 10)):
        self.weights = np.random.rand(*grid_size)  # Initialize random weights

    def predict_score(self, pattern):
        """Predict user satisfaction for a pattern."""
        return np.sum(self.weights * pattern)

    def update_weights(self, pattern, user_feedback, learning_rate=0.1):
        """Update weights based on user feedback."""
        predicted = self.predict_score(pattern)
        error = user_feedback - predicted
        self.weights += learning_rate * error * pattern

In [4]:
def main():
    grid_size = (10, 10)
    model = PreferenceModel(grid_size)
    num_iterations = 20

    for iteration in range(num_iterations):
        # Generate an initial pattern
        pattern = initialize_pattern(grid_size)
        evolved_pattern = simulate_game(pattern, steps=5)

        # Collect user feedback
        print(f"Iteration {iteration + 1}")
        user_feedback = get_user_feedback(evolved_pattern)

        # Update the model based on feedback
        model.update_weights(evolved_pattern, user_feedback)

        print(f"Updated weights:\n{model.weights}\n")

if __name__ == "__main__":
    main()


Iteration 1
[[0 0 0 0 0 1 1 0 0 0]
 [0 0 0 0 1 0 1 1 0 0]
 [0 0 0 1 0 0 1 0 0 0]
 [0 0 0 0 1 1 0 0 0 0]
 [0 0 0 0 1 1 0 0 0 0]
 [0 0 0 0 1 1 1 0 0 1]
 [1 0 0 0 1 1 1 1 1 0]
 [0 1 0 0 0 1 0 1 0 0]
 [1 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 1 0]]
Updated weights:
[[ 0.14136847  0.3531767   0.86663175  0.03992859  0.18662682 -0.26395991
  -0.15188763  0.01050187  0.14134669  0.99549604]
 [ 0.36785572  0.63834941  0.85366408  0.52905637 -0.56695785  0.10038656
   0.23949914  0.26770826  0.54233402  0.33409627]
 [ 0.58477942  0.82827034  0.75748201  0.28814554  0.26167522  0.61071821
   0.16471408  0.93275637  0.46612493  0.21192327]
 [ 0.83772495  0.71174398  0.57814723  0.39738166 -0.37318264 -0.38119681
   0.78831207  0.84797651  0.16731582  0.18178579]
 [ 0.86058066  0.67971581  0.19857891  0.93312518 -0.04761862  0.32159522
   0.10947619  0.27520362  0.7335191   0.22573578]
 [ 0.6229681   0.54694486  0.03682655  0.4299013   0.05649726 -0.06367281
  -0.05664939  0.01828909  0.34782842 -0.