# imCPP: Dataset Creation

We create a dataset consisting of 600 randomly generated 10x10 maps. For each map, we generate 50 random mCPP problems involving five agents and solve them with DARP. 

## Dataset Encoding

To store our dataset compactly, we use a custom encoding scheme. 

In [1]:
from dataclasses import dataclass

@dataclass
class DatasetParameters:
    map_width: int = 10
    map_count: int = 600
    problem_count: int = 10
    obstacle_count: int = 10
    agent_count: int = 4

In [2]:
from typing import List, Tuple
import gzip
import numpy as np


def bits_to_bytes(data: str) -> bytes:    
    return bytes([int(data[i * 8 : (i + 1) * 8], 2) for i in range(len(data) // 8)])


def bytes_to_bits(data: bytes) -> str:
    return ''.join('{:08b}'.format(byte) for byte in data)


class DatasetEncoder:    
    
    def __init__(
        self,
        parameters: DatasetParameters,
    ):
        self.map_width = parameters.map_width
        self.map_count = parameters.map_count
        self.problem_count = parameters.problem_count
        self.obstacle_count = parameters.obstacle_count
        self.agent_count = parameters.agent_count
        self.coordinate_length = int(2 * np.ceil(np.log2(self.map_width)))
        self.map_length = int(self.obstacle_count * self.coordinate_length)
        self.problem_length = int(self.agent_count * self.coordinate_length)
        self.solution_length = int(self.map_width ** 2) * 4
    
    
    def encode_coordinate(self, coordinate: Tuple) -> str:
        return ('{:0%ib}{:0%ib}' % (self.coordinate_length // 2, self.coordinate_length // 2)).format(*coordinate)

    
    def decode_coordinate(self, data: str) -> str:
        return int(data[: self.coordinate_length // 2], 2), int(data[self.coordinate_length // 2 :], 2)
    
    
    def encode_coordinates(self, map: List) -> str:
        return ''.join(self.encode_coordinate(obstacle) for obstacle in map)


    def decode_coordinates(self, data: str) -> List:
        return list(self.decode_coordinate(data[i * self.coordinate_length : (i + 1) * self.coordinate_length]) for i in range(len(data) // self.coordinate_length))

    
    def encode_solution(self, solution: np.array) -> str:
        return ''.join(str(bit) for bit in solution.flatten())


    def decode_solution(self, data) -> np.array:
        return np.array(list(data)).astype(int).reshape(self.agent_count, self.map_width, self.map_width)
    
    
    def encode_dataset(self, maps: List, problems: List, solutions: List) -> str:
    
        data = ''

        for map in maps:
            data += self.encode_coordinates(map)

        for problem in problems:
            data += self.encode_coordinates(problem)

        for solution in solutions:
            data += self.encode_solution(solution)

            
        data += '0' * (len(data) % 8) # padding
        return data 

    
    def decode_dataset(self, data: str) -> Tuple:
        
        maps = [self.decode_coordinates(data[i * self.map_length : (i + 1) * self.map_length]) for i in range(self.map_count)]
        data = data[self.map_count * self.map_length :]
        
        problems = [self.decode_coordinates(data[i * self.problem_length : (i + 1) * self.problem_length]) for i in range(self.map_count * self.problem_count)]
        data = data[self.map_count * self.problem_count * self.problem_length :]
        
        solutions = [self.decode_solution(data[i * self.solution_length : (i + 1) * self.solution_length]) for i in range(self.map_count * self.problem_count)]
        data = data[self.map_count * self.problem_count * self.solution_length :]
        
        return maps, problems, solutions
    
    
    def encode(self, maps: List, problems: List, solutions: List) -> bytes:
        return gzip.compress(bits_to_bytes(self.encode_dataset(maps, problems, solutions)))
    
    
    def decode(self, data: bytes) -> Tuple:
        return self.decode_dataset(bytes_to_bits(gzip.decompress(data)))

## Dataset Creation

Now we turn to the task of generating our dataset.

In [3]:
cached_map_coordinates = {}


def get_map_coordinates(parameters: DatasetParameters) -> set:
    
    map_coordinates = cached_map_coordinates.get(parameters.map_width)
    
    if map_coordinates == None:
        map_coordinates = list((y, x) for y in range(parameters.map_width) for x in range(parameters.map_width))
        cached_map_coordinates[parameters.map_width] = map_coordinates
    
    return map_coordinates

In [4]:
import random


def generate_map(parameters: DatasetParameters) -> List:
    
    map_coordintes = get_map_coordinates(parameters)
    obstacle_coordinates = random.sample(map_coordintes, k=parameters.obstacle_count)
    
    return obstacle_coordinates

In [5]:
def generate_problem(parameters: DatasetParameters, map: List) -> List:
    
    map_coordinates = get_map_coordinates(parameters)
    map_coordinates = list(set(map_coordinates) - set(map)) # Exclude obstacles 
    agent_coordinates = random.sample(map_coordinates, k=parameters.agent_count)
    
    return agent_coordinates

In [6]:
import darpy


def generate_solution(parameters: DatasetParameters, map: List, problem: List) -> np.array:
    
    darp_problem = darpy.DARPProblem(
        map=darpy.DARPMap(rows=parameters.map_width, columns=parameters.map_width),
        agents=[darpy.DARPCoordinate(x, y) for y, x in problem],
        obstacles=[darpy.DARPCoordinate(x, y) for y, x in map],
    )
    
    solved, solution = darp_problem.solve(iterations=100)
    solution = solution.astype(int)

    return solved, solution

In [7]:
dataset_parameters = DatasetParameters(
    map_width=10,
    map_count=600, # since some will not be solved
    problem_count=50,
    obstacle_count=10,
    agent_count=4,
)

In [8]:

import time
from etaprogress.progress import ProgressBar


total_problems = dataset_parameters.map_count * dataset_parameters.problem_count
progress_bar = ProgressBar(total_problems, max_width=40)

maps = []
problems = []
solutions = []
problems_generated = 0
maps_generated = 0

enc = DatasetEncoder(dataset_parameters)

while len(solutions) < total_problems:
    
    # Generate a fresh map...
    
    map = generate_map(dataset_parameters)
    map_solutions = 0
    map_valid = True
    

    
    if (maps_generated % 10) == 0:
        print(f'[*] Finding {dataset_parameters.problem_count} solved problems for map {maps_generated}/{dataset_parameters.map_count}...')
    
    
    # Find `problem_count` solved problems for this map.
    
    while map_solutions < dataset_parameters.problem_count:
        
        # Generate a random problem...
        
        try:
        
            problem = generate_problem(dataset_parameters, map)
            solved, solution = generate_solution(dataset_parameters, map, problem)
        
        except:
            print('Map invalid')
            map_valid = False
            break
            
        problems_generated += 1
        
        if solved:
            map_solutions += 1
            solutions.append(solution)
            problems.append(problem)
            
    if not map_valid:
        continue
    
    maps_generated += 1
    maps.append(map)
    
    if (maps_generated % 10) == 0:
        print(f'[*] Saving checkpoint...')
        
        dataset = enc.encode(maps, problems, solutions)
        with open(f'./checkpoint-{int(time.time())}.gz', 'wb') as file:
            print(file.write(dataset))
            

[*] Finding 50 solved problems for map 0/600...
Map invalid
[*] Saving checkpoint...
14341
[*] Finding 50 solved problems for map 10/600...
[*] Saving checkpoint...
28138
[*] Finding 50 solved problems for map 20/600...
[*] Saving checkpoint...
41608
[*] Finding 50 solved problems for map 30/600...
Map invalid
Map invalid
[*] Saving checkpoint...
55093
[*] Finding 50 solved problems for map 40/600...
[*] Saving checkpoint...
68390
[*] Finding 50 solved problems for map 50/600...
[*] Saving checkpoint...
81644
[*] Finding 50 solved problems for map 60/600...
Map invalid
[*] Saving checkpoint...
94830
[*] Finding 50 solved problems for map 70/600...
[*] Saving checkpoint...
107853
[*] Finding 50 solved problems for map 80/600...
[*] Saving checkpoint...
121844
[*] Finding 50 solved problems for map 90/600...
Map invalid
[*] Saving checkpoint...
135685
[*] Finding 50 solved problems for map 100/600...
[*] Saving checkpoint...
149194
[*] Finding 50 solved problems for map 110/600...
Map in

In [9]:


print('FINISHED')

FINISHED


In [11]:
d1 = enc.decode(open('./checkpoint-1674695157.gz', 'rb').read())

In [12]:
len(d1[0])

600