# Dataset Generator for FullDA-FM

In [None]:
__author__ = "Ludovico Scarton"
__version__= 0.1
__email__ = "ludovico.scarton@gmail.com"

This is a dataset generator created to support the work of Michael Loomis for training ANN.

It can be used to perform:
* encoding-decoding $f(𝕯) = \hat{𝕯}$
* regression $f(𝕯) = \vec{y}$

In [1]:
import numpy as np
import numpy.typing as npt
import matplotlib.pyplot as plt

In [2]:
locations = np.nonzero(np.load('valid_building_location.npy'))

In [3]:
number_of_generated_samples: int = 5_000_000
number_of_survive_samples: int = 50_000

In [4]:
number_of_locations = len(locations[0])
assert number_of_locations == 152

In [5]:
def create_dataset(number_of_survive_samples:int, number_of_generated_samples: int, seed=0) -> npt.ArrayLike:
    """
    Create valid design. the map is filled in the 152 valid building locations with values between 0 and 3, where 0 is no voxel and 3 is 3 voxels vertically stucked.

    parameters:
        number_of_samples (int): number of design samples.
        seed: pseudo number generator seed.

    return:
        𝕯 np.array: tensor of shape [number_of_survive_samples, 30, 30] with valid designs.
        y⃗ np.array: vector of shape [number_of_survive_samples] with the fitness values.
    """
    assert number_of_generated_samples > number_of_survive_samples, 'number of generated samples needs to be larger that the one selected'
    np.random.seed(seed)
    𝐗 = np.random.choice(4, p=[0.8, 0.1, 0.07, 0.03], size = (number_of_generated_samples, number_of_locations))
    𝕯 = np.zeros((number_of_generated_samples,30,30), dtype= np.int32)
    𝕯[:,locations[0], locations[1]] = 𝐗
    y⃗ = np.max(𝕯, axis = 1).sum(axis=1)
    ids = np.argsort(y⃗)[:number_of_survive_samples]
    return 𝕯[ids], y⃗[ids]
    

In [6]:
assert np.all(create_dataset(1,2)[0] == create_dataset(1,2)[0]) # test if same seeds provide identical results
assert not np.all(create_dataset(1,2)[0] == create_dataset(1,2, seed=1)[0]) # test if different seeds provide different results

In [None]:
A = create_dataset(number_of_survive_samples, number_of_generated_samples)
plt.imshow(A[0][0], cmap= 'gray')
plt.title(f'example of a valid design with a fitness of {A[1][0]}')
plt.show()


Good luck with your project!! =)
Ludovico