In [1]:
#This Deonstrates the Workflow with the API from API.py

import API
import torch
import numpy as np

from IPython.display import Markdown#, display

In [2]:
#Example Definition from API.py, with some changes
definition_model1 = {
    "processor": "Processor_cond",
    "processor_args": {},
    "processor_data": {"folder": "all_sims"},
    "processor_clean": {"N_min":500},
    "flow_hyper": {"n_layers":14, "dim_notcond": 10, "dim_cond": 4, "CL":"NSF_CL2", "K": 10, "B":3, "network":"MLP", "network_args":torch.tensor([128,4,0.2])},
    "subset_params": {"cond_fn": "cond_M_stars_2age_avZ", "use_fn_constructor": "construct_all_galaxies_leavout", "leavout_key": "id", "leavout_vals": []},
    "data_prep_args": {"transformation_functions":("np.log10",), "transformation_components":(["M_stars"],), "inverse_transformations":("10**x",)}
}

In [3]:
#Initiate from definition, with all the desired parameters
model1 = API.GalacticFlow(definition_model1)

In [None]:
#Prepare the model (i.e. load and process the data)
model1.prepare()

In [None]:
#The cleaned data of the chosen subset is stored in .Galaxies (this includes galaxies that are not used for training)
example_galaxy = model1.Galaxies[0]

for key, value in example_galaxy.items():
    print(f'key "{key}" contains a {type(value).__name__}:')
    display(value)


In [None]:
#Now due to our choice of subset the model knows what components and conditions we have.

#We specify "stars", because we technically could have also e.g. gas, which could have different components
#While gas will have the same galactic parameters (see also above), e.g. gas might be learned conditional on the postion (even if unusual support is built in)
print(f'Components: {model1.get_components("stars")}')
print(f'Conditions: {model1.get_conds("stars")}')

In [None]:
#Train the model.
#Often we want to train in a dedicated .py script that is started with e.g. nohup because training can take a long time.
model1.train(1,0.0004, 1024, 0.998, "cuda:9")

In [None]:
#Take out the Data from galaxy nb. 5
data_galaxy = model1.Galaxies[5]

#Grab its parameters
galaxy_params = data_galaxy["parameters"]
print("Galaxy Parameters:")
display(galaxy_params)

#Sample the model at this parameters with same number of stars
n_stars = len(data_galaxy["stars"])
model_galaxy = model1.sample_galaxy(n_stars, galaxy_params, GPUs=[6,7,8,9])

print("Sampled Galaxy:")
display(model_galaxy)

### Many ways to sample a galaxy...

In [None]:
#Multiple Galaxies can be sampled at once, e.g. 10**4 and 10**5 stars for galaxy parameters like nb.5 and nb.80, respectively.
import time
start = time.perf_counter()
model_galaxy_alternative = model1.sample_galaxy([10**4,10**5], [galaxy_params, model1.Galaxies[80]["parameters"]], GPUs=[6,7,8,9])
end = time.perf_counter()

print(f"Sampling took {end-start} seconds")

print("Sampled Galaxies:")
print(f"Shape of the first galaxy: {model_galaxy_alternative[0].shape}")
display(model_galaxy_alternative[0][:5])
print(f"Shape of the second galaxy: {model_galaxy_alternative[1].shape}")
display(model_galaxy_alternative[1][:5])

In [None]:
#Calling the function two times is way slower. For small sample sizes almost double the time.
start = time.perf_counter()
model1.sample_galaxy(10**4, galaxy_params, GPUs=[6,7,8,9])
model1.sample_galaxy(10**5, model1.Galaxies[80]["parameters"], GPUs=[6,7,8,9])
end = time.perf_counter()

print(f"Sampling took {end-start} seconds")

In [None]:
#Different parameters, same number of stars
#Cpu is faster for small sample sizes
model_galaxy_alternative = model1.sample_galaxy(10**4, [galaxy_params, model1.Galaxies[80]["parameters"]], GPUs=None)

print("Sampled Galaxies:")
print(f"Shape of the first galaxy: {model_galaxy_alternative[0].shape}")
display(model_galaxy_alternative[0][:2])
print(f"Shape of the second galaxy: {model_galaxy_alternative[1].shape}")
display(model_galaxy_alternative[1][:2])

In [None]:
#Same parameters, different number of stars
model_galaxy_alternative = model1.sample_galaxy([10**4,10], galaxy_params, GPUs=None)

print("Sampled Galaxies:")
print(f"Shape of the first galaxy: {model_galaxy_alternative[0].shape}")
display(model_galaxy_alternative[0][:2])
print(f"Shape of the second galaxy: {model_galaxy_alternative[1].shape}")
display(model_galaxy_alternative[1][:2])

In [None]:
#Now about how the sample may be returned: the reinsert_conditions argument

#Default (before) is True, which means that the conditions are reinserted into the sample

#Also possible: "none"
#This means that the conditions are not reinserted into the sample
model_galaxy_alternative = model1.sample_galaxy(10**4, galaxy_params, GPUs=None, reinsert_conditions="none")

print('Mode "none":')
display(model_galaxy_alternative[:2])

#Also possible: "local"
#This means that only conditions like "x" are reinserted into the sample, but not galaxy parameters like "M_stars"
#Here we have no conditions like "x", so it is the same as "none"
model_galaxy_alternative = model1.sample_galaxy(10**4, galaxy_params, GPUs=None, reinsert_conditions="local")

print('Mode "local":')
display(model_galaxy_alternative[:2])

#Also possible: "galaxy"
#This changes the returned Data type to the standard galaxy type, a list of dicts one for each galaxy, just like model1.Galaxies
model_galaxy_alternative = model1.sample_galaxy([10**4]*2, galaxy_params, GPUs=None, reinsert_conditions="galaxy")

print('Mode "galaxy":')
for galaxy in model_galaxy_alternative:
    for key, value in galaxy.items():
        print(f'key "{key}" contains a {type(value).__name__}:')
        display(value)

## Evaluating the results

In [None]:
#E.g. visualize it now
import res_flow_vis as visual

In [None]:
#The plot fn do not yet support the new galaxy data (dicts+DataFrames), so we have to convert it back to the old one (np.arrays)
visual.get_result_plots(data_galaxy["stars"].values, model_galaxy.values, label = "API_TEST")

In [4]:
#We can also check some important internal parameters of the model
print("Internal Parameters:")
#display(model1.processor.mu)
#display(model1.processor.std)
print("")
print("Flow architecture:")
display(Markdown(model1.flow_architecture))

Internal Parameters:

Flow architecture:


Data&nbsp;dim:&nbsp;10,&nbsp;Condition&nbsp;dim:&nbsp;4<br><br>Flow&nbsp;architecture:<br>Type&nbsp;of&nbsp;coupling&nbsp;layer:&nbsp;NSF_CL2<br>Number&nbsp;of&nbsp;layers:&nbsp;14<br>Number&nbsp;of&nbsp;spline&nbsp;bins:&nbsp;10<br>Spline&nbsp;range:&nbsp;3<br>Base&nbsp;network:&nbsp;MLP<br><br>Base&nbsp;network&nbsp;architecture:<br>Number&nbsp;of&nbsp;layers:&nbsp;4<br>Number&nbsp;of&nbsp;neurons&nbsp;per&nbsp;layer:&nbsp;128<br>Leaky&nbsp;ReLU&nbsp;slope:&nbsp;0.20000000298023224

## Saving the model and loading from saved file

In [None]:
#Save the model to a file
model1.save("GF_model1.pth")

In [None]:
#Somewhere else, or another day/session
#We need no more than (the imports and):
model2 = API.GalacticFlow("GF_model1.pth")

In [None]:
#All parameters are still there
model2.processor.mu, model2.processor.std

In [None]:
#Also the components and conditions
print(f'Components: {model2.get_components("stars")}')
print(f'Conditions: {model2.get_conds("stars")}')

In [None]:
#Preperation yields the same data
#model2.prepare()
data_galaxy = model2.Galaxies[5]

data_params = data_galaxy["parameters"]

model_galaxy = model2.sample_galaxy(len(data_galaxy["stars"]), data_params, GPUs=None, reinsert_conditions="galaxy")

In [None]:
#Can again be visualized and remains the same
visual.get_result_plots(data_galaxy["stars"].values, model_galaxy["stars"].values, label = "API_TEST2")