This Notebook shows directly how to reproduce the results in the papr.

All neccesary explanations are also found in other Notebooks, but this is dedicated to the reproduction of the results.

In [None]:
import torch
import numpy as np
import API
import res_flow_vis as visual
import copy

### Model training

The training was done with `train_models.py`, but here is a short explanation:

In [None]:
#Define all models hyperparameters, see API.py or API_Workflow.ipynb for more details
definition_model = {
    "processor": "Processor_cond",
    "processor_args": {},
    "processor_data": {"folder": "all_sims"},
    "processor_clean": {"N_min":500},
    "flow_hyper": {"n_layers":14, "dim_notcond": 10, "dim_cond": 4, "CL":"NSF_CL2", "K": 10, "B":3, "network":"MLP", "network_args":torch.tensor([128,4,0.2])},
    "subset_params": {"cond_fn": "cond_M_stars_2age_avZ", "use_fn_constructor": "construct_all_galaxies_leavout", "leavout_key": "id", "leavout_vals": [66, 20, 88, 48, 5]},
    "data_prep_args": {"transformation_functions":("np.log10",), "transformation_components":(["M_stars"],), "inverse_transformations":("10**x",), "transformation_logdets":("logdet_log10",)}
}

In [None]:
#Every galaxy has a unique id in [0,94], so we can use this as a key to leave out a galaxy key
#However some keys are missing as the galaxy was removed in cleaning
#Valid ids are (if you change nothing about cleaning):
#Code to quickly get the valid ones:
model = API.GalacticFlow(definition_model)
model.prepare()
existing_ids = model.processor.get_array(model.Galaxies, "galaxy", "id")
print(existing_ids)

In [None]:
#Then select which ids to leave out in a given model e.g. [25, 89, 46, 38, 14]
#The galaxies with this ids will be left out of the training set
#Enter this list as "leavout_vals" in the definition_model
definition_model2 = copy.deepcopy(definition_model)
definition_model2["subset_params"]["leavout_vals"] = [25, 89, 46, 38, 14]

In [None]:
#Now to train a model
model2 = API.GalacticFlow(definition_model)
model2.prepare()
model2.train(epochs=10, init_lr=0.00009, batch_size=1024, gamma=0.998, device="cpu")
model.save("model2.pth")

In [None]:
#or you can use, especially intended for training multiple models in parallel and restarting on crash
#see train_models.py
models = [definition_model2]
train_kwargs = [{"epochs": 9, "batch_size": 1024, "init_lr": 0.00009, "gamma": 0.998, "update_textfile":f"model{[25, 89, 46, 38, 14]}"}]
save_to = ["model2.pth"]
devices = [None]
API.train_GF(models, devices, train_kwargs, save_to, max_restart=3)

### Loading a pretrained model

In [None]:
#For a given filename e.g. model[25, 89, 46, 38, 14].pth
#Just load it like this:
#(use the right path)
model3 = API.GalacticFlow("saves/cross_val/model[25, 89, 46, 38, 14].pth")

In [None]:
#You can do inference now already, but to better compare to data, we need to prepare the data
model3.prepare()

#### Inference

In [None]:
#Get one of the galaxies from the data (in this case validation set)
#(model.Galaxies includes training and validation set)
id_use = 89
galaxy_use = list(filter(lambda x: x["galaxy"]["id"] == id_use, model3.Galaxies))[0]
galactic_parameters = galaxy_use["parameters"]

In [None]:
#Sample from the model at those parameters
model_galaxy = model3.sample_galaxy(N_stars=galaxy_use["stars"].shape[0], parameters=galaxy_use["parameters"], reinsert_conditions="galaxy", GPUs=None)

In [None]:
#Now you can e.g. visualize the galaxy and compare to the data
manual_cuts = {"feh":(-2.3,np.inf),"ofe":(-0.1,0.5)}
labels = ["$x/\mathrm{kpc}$", "$y/\mathrm{kpc}$", "$z/\mathrm{kpc}$", "$v_x/\mathrm{km/s}$", "$v_y/\mathrm{km/s}$", "$v_z/\mathrm{km/s}$", "$Z$" ,"$[\mathrm{Fe}/\mathrm{H}]$", "$[\mathrm{O}/\mathrm{Fe}]$", "$\\tau/\mathrm{Gyr}$"]
visual.cornerplot_hist(galaxy_use["stars"], model_galaxy["stars"], grid_pass=True, names_to_print=labels, save_fig=False, manual_cut_dict=manual_cuts)

### MMD

In [None]:
#See metrics.py
#but here is a short explanation
import glob
import metrics

#(Use the right path)
trained_models = glob.glob("saves/cross_val/model*.pth")

#To speed things up, we can compute all cleaned galaxies once and save them
ag_model = API.GalacticFlow(trained_models[0])
ag_model.prepare()
all_galaxies = ag_model.Galaxies

In [None]:
leavout_MMDs = []
leavout_MMDs_data = []
for model in trained_models:
    model = API.GalacticFlow(model)
    #Pass all_galaxies to avoid needing to run .prepare() on every model
    #Get the unseen galaxies from validation set, sample them back
    #And for each of them compute the MMD to all data galaxies
    #Save galactic parameter to each MMD computed
    leavout_MMDs += metrics.get_leavout_MMDs(model, GPUs=None, all_galaxies=all_galaxies)
    leavout_MMDs_data += metrics.get_leavout_MMDs(model, GPUs=None, all_galaxies=all_galaxies, both_data=True)


In [None]:
#Now compute MMD vs difference in parameters
#And combine results in one relation
#Define to transform parameters to log_M_stars and Z_av_solar
transform_dict = {"M_stars": (np.log10, "log_M_stars"), "Z_av": (lambda x: x/0.0134, "Z_av_solar")}
MMDs_vs_params = metrics.MMD_vs_params(leavout_MMDs, transform_dict=transform_dict)
MMDs_vs_params_data = metrics.MMD_vs_params(leavout_MMDs_data, transform_dict=transform_dict)

In [None]:
#Now you can plot the results
import matplotlib.pyplot as plt
plt.plot(MMDs_vs_params["log_M_stars"], MMDs_vs_params["MMD"], marker=".", linestyle="none", markersize=4, alpha=0.2, color="orange", label="GalaticFlow")
cs, fmt = visual.make_contours(MMDs_vs_params_data["log_M_stars"].values, MMDs_vs_params_data["MMD"].values, cumulative_levels=[0.9,0.7,0.5,0.3,0.1,0.03,0.01], cmap="RdGy_r")

plt.plot([], [], c="gray", label="data")
plt.clabel(cs, inline=1, fontsize=10, fmt=fmt)
plt.xlabel("$\Delta \log M_{\\rm{stars}}$")

plt.ylabel("MMD")
plt.legend(loc=9)
#plt.savefig("MMD_vs_params.pdf", bbox_inches="tight")
plt.show()