In [1]:
import numpy as np

from src.grid_world import GridWorld

pygame 2.5.2 (SDL 2.28.2, Python 3.10.14)
Hello from the pygame community. https://www.pygame.org/contribute.html


TODO

- Sample (state, action)-pairs. Split them to context and target sets!
- Figure out how to split them correctly, what differences are there brtween
- Train a neural process using the HIIT library

### Simulator

Global Fixed Parameters:
- Grid_size int: side lengths of the grid
- Agent_view_size int: side length of the agents view area

User specific parameters

Fixed

- Agent_pos (int, int): Agent's initial position
- Goal_pos (int, int): Goal's initial position

Change each trial

- Mode_densities: 

Keep fixed:
Grid size
Agent_view_size
Mode_densities

Sample:
Goal_pos,
Agent_pos

User parameters:
Number of belief modes
Distribution of the belief modes (Dirichlet)

In [2]:
def sample_mode_densities(max_modes = 3, total_density = 0.9):
  '''
  Samples a list of mode densities that guide the agent's
  behavior.
  '''
  num_modes = np.random.randint(1, max_modes + 1)
  densities = np.random.dirichlet(np.ones(num_modes)) * total_density
  return list(densities)

test_sample = sample_mode_densities(max_modes = 5, total_density = 0.5)
print(test_sample)
print(sum(test_sample))

[0.4669898002984246, 0.03301019970157546]
0.5


In [3]:
def generate_user_parameters(grid_size, mode_params = None):
  '''
  Generates a set of parameters that define the user's behavior.
  '''
  
  mode_densities = None
  if mode_params is None:
    mode_densities = sample_mode_densities()
  else:
    mode_densities = sample_mode_densities(mode_params)
  
  mode_positions = np.random.randint(0, grid_size, (len(mode_densities), 2))
  
  return {
    'mode_densities': mode_densities,
    'mode_positions': mode_positions,
  }
  
test_param = generate_user_parameters(10)
print(test_param)

{'mode_densities': [0.15382465956825817, 0.09698756740550615, 0.6491877730262358], 'mode_positions': array([[0, 3],
       [4, 3],
       [1, 5]])}


In [13]:
def generate_user_trajectories(num_trajectories, grid_size, agent_view_size, user_params):
  mode_densities = user_params['mode_densities']
  mode_positions = user_params['mode_positions']
  
  env = GridWorld(render_mode = "rgb_array", size = grid_size, agent_view_size = agent_view_size, mode_densities = mode_densities, mode_positions=mode_positions)
  
  trajectories = []
  
  for _ in range(num_trajectories):
    trajectory = []
    
    obs = env.reset()
    state = obs[0]['agent_pos']
    
    done = False
    while not done:
      action = env.max_neighboring_reward()
      trajectories.append((state, action))
      next_obs, _, done, truncated, _ = env.step(action)
      state = next_obs['agent_pos']
      
      done = done or truncated
    
    
  return trajectories

num_traj = 3
grid_size = 10
agent_view_size = 4
user_params = generate_user_parameters(grid_size)
trajectories = generate_user_trajectories(num_traj, grid_size, agent_view_size, user_params)
print(trajectories)

[((8, 9), 2), ((8, 8), 2), ((8, 7), 2), ((8, 6), 1), ((9, 6), 0), ((8, 6), 0), ((7, 6), 0), ((6, 6), 0), ((3, 10), 2), ((3, 9), 1), ((4, 9), 1), ((5, 9), 2), ((5, 8), 1), ((6, 8), 1), ((7, 8), 2), ((7, 7), 2), ((7, 6), 2), ((7, 5), 1), ((8, 5), 1), ((9, 5), 1), ((10, 5), 2), ((3, 10), 1), ((4, 10), 1), ((5, 10), 1), ((6, 10), 1), ((7, 10), 1), ((8, 10), 2), ((8, 9), 2)]


In [14]:
def split_trajectory_half(trajectory):
  '''
  Splits a trajectory into two halves.
  '''
  half = len(trajectory) // 2
  return trajectory[:half], trajectory[half:]

([((8, 9), 2), ((8, 8), 2), ((8, 7), 2), ((8, 6), 1), ((9, 6), 0), ((8, 6), 0), ((7, 6), 0), ((6, 6), 0), ((3, 10), 2), ((3, 9), 1), ((4, 9), 1), ((5, 9), 2), ((5, 8), 1), ((6, 8), 1)], [((7, 8), 2), ((7, 7), 2), ((7, 6), 2), ((7, 5), 1), ((8, 5), 1), ((9, 5), 1), ((10, 5), 2), ((3, 10), 1), ((4, 10), 1), ((5, 10), 1), ((6, 10), 1), ((7, 10), 1), ((8, 10), 2), ((8, 9), 2)])


In [18]:
def generate_all_user_data(num_users = 10000, grid_size = 10, agent_view_size = 4):
  all_user_data = []
  
  for i in range(num_users):
    user_params = generate_user_parameters(grid_size)
    
    num_trajectories = np.random.randint(1, 11)
    
    trajectories = generate_user_trajectories(num_trajectories, grid_size, agent_view_size, user_params)
    
    all_user_data.append({
      'mode_densities': user_params['mode_densities'],
      'mode_positions' : user_params['mode_positions'],
      'trajectories' : trajectories
    })
    
    if i % 100 == 0:
      print(f"Generated data for {i} users.")
  
  return all_user_data

In [20]:
GRID_SIZE = 10
AGENT_VIEW_SIZE = 4
N_USERS = 100

all_user_data = generate_all_user_data(num_users = N_USERS, grid_size = GRID_SIZE, agent_view_size = AGENT_VIEW_SIZE)

Generated data for 0 users.
Generated data for 100 users.
Generated data for 200 users.


KeyboardInterrupt: 