# API usage guide

In [1]:
#This Demonstrates the Workflow with the API from API.py
import API
import torch
import numpy as np

from IPython.display import Markdown#, display

### Defining a model

The steps here can be replaced with loading a pre trained model as described below.

In [3]:
#Example Definition from API.py, with some changes
definition_model1 = {
    #Processor tu use (str as registered in func_handle)
    "processor": "Processor_cond",
    #Processor init args
    "processor_args": {},
    #Processor get_data args here folder name with data
    "processor_data": {"folder": "all_sims"},
    #Processor cleaning_function args
    "processor_clean": {"N_min":500},
    #Flow hyperparameters
    "flow_hyper": {"n_layers":14, "dim_notcond": 10, "dim_cond": 4, "CL":"NSF_CL2", "K": 10, "B":3, "network":"MLP", "network_args":torch.tensor([128,4,0.2])},
    #Parameters for choosing the subset of the data to use:
    #cond_fn: The function that computes/determines the condition for each galaxy. (See Processor_cond.choose_subset() for details.)
    #use_fn_constructor: The function that constructs the subset of the data to use. (See Processor_cond.choose_subset() for details.)
    #Will be called with leavout_key and leavout_vals as kwargs. I.e. will leavout galaxies that have galaxy["galaxy"][leavout_key] in leavout_vals.
    #The remaining galaxies are used for training.
    #use_fn_constructor is also once called with leavout_vals=[] to construct the full dataset for comparing (i.e. include validation set)
    "subset_params": {"cond_fn": "cond_M_stars_2age_avZ", "use_fn_constructor": "construct_all_galaxies_leavout", "leavout_key": "id", "leavout_vals": [66, 20, 88, 48, 5]},
    #Parameters to processor.Data_to_flow
    #transformation_components[i] will be transformed with transformation_functions[i] and the corresponding inverse transformation is given by inverse_transformations[i]
    #transformation_logdets[i] is the logdet of the transformation_functions[i], needed in case of pdf evaluation.
    "data_prep_args": {"transformation_functions":("np.log10",), "transformation_components":(["M_stars"],), "inverse_transformations":("10**x",), "transformation_logdets":("logdet_log10",)}
}

In [4]:
#Initiate from definition, with all the desired parameters
model1 = API.GalacticFlow(definition_model1)

### Model and Data

These steps (except for training itself) are also done used at pre trained models.

In [11]:
#Prepare the model (i.e. load and process the data)
model1.prepare()

Cut out 5 of 95 galaxies, 2072015 of 34878379 stars (~6%).
Chose 90 of 90 galaxies.
Chose 90 of 90 galaxies.


In [12]:
#The cleaned data of the chosen subset is stored in .Galaxies (this includes galaxies that are not used for training)
example_galaxy = model1.Galaxies[0]

for key, value in example_galaxy.items():
    print(f'key "{key}" contains a {type(value).__name__}:')
    display(value)

key "stars" contains a DataFrame:


Unnamed: 0,x,y,z,vx,vy,vz,Z,feh,ofe,age
5,0.412800,-0.707363,-0.299138,-48.034634,27.062552,58.295642,5.914080e-07,-5.221701,0.708509,13.516774
6,-1.625531,-0.766206,0.289526,-5.114304,70.819484,24.436441,2.317112e-07,-5.603453,0.682937,13.516774
7,9.671159,1.127014,-0.269296,-79.656855,23.959195,2.403746,1.007928e-05,-4.036935,0.755956,13.515931
8,-5.684086,1.792479,-0.622473,-35.460856,-33.859797,8.264200,7.682763e-06,-4.107091,0.707528,13.515088
9,-2.544000,2.609641,-0.003698,11.379407,17.999412,24.949269,2.576272e-05,-3.551783,0.677268,13.514245
...,...,...,...,...,...,...,...,...,...,...
70473,6.894527,4.661355,-1.684753,-48.536045,23.866754,19.308997,2.247055e-03,-1.014137,0.063585,0.000855
70474,7.050741,4.527749,-1.628008,-21.152525,10.153983,14.985289,2.307435e-03,-1.002988,0.064091,0.000012
70475,7.177917,4.491626,-1.605336,-17.681683,9.604566,12.951498,2.271282e-03,-1.010247,0.064449,0.000012
70476,7.033750,4.504376,-1.702954,-25.656926,7.408399,13.437631,2.277983e-03,-1.005108,0.060350,0.000012


key "galaxy" contains a dict:


{'M_dm': 109636618947.82254,
 'M_stars': 547990391.3124597,
 'N_stars': 66883,
 'id': 0,
 'NIHAO_id': 'g1.05e11'}

key "parameters" contains a DataFrame:


Unnamed: 0,M_stars,tau50,tau10,Z_av
0,547990400.0,4.942476,1.217085,0.001316


In [13]:
#Now due to our choice of subset the model knows what components and conditions we have.

#We specify "stars", because we technically could have also e.g. gas, which could have different components
#While gas will have the same galactic parameters (see also above), e.g. gas might be learned conditional on the postion (even if unusual support is built in)
print(f'Components: {model1.get_components("stars")}')
print(f'Conditions: {model1.get_conds("stars")}')

Components: ['x', 'y', 'z', 'vx', 'vy', 'vz', 'Z', 'feh', 'ofe', 'age']
Conditions: ['M_stars', 'tau50', 'tau10', 'Z_av']


In [None]:
#Train the model.
#Often we want to train in a dedicated .py script that is started with e.g. nohup because training can take a long time.
model1.train(1,0.0004, 1024, 0.998, "cuda:9")

In [17]:
#Take out the Data from galaxy nb. 5
data_galaxy = model1.Galaxies[5]

#Grab its parameters
galaxy_params = data_galaxy["parameters"]
print("Galaxy Parameters:")
display(galaxy_params)

#Sample the model at this parameters with same number of stars
n_stars = len(data_galaxy["stars"])
model_galaxy = model1.sample_galaxy(n_stars, galaxy_params, GPUs=[1,2,3,4,5])

print("Sampled Galaxy:")
display(model_galaxy)

Galaxy Parameters:


Unnamed: 0,M_stars,tau50,tau10,Z_av
0,75383840000.0,7.424664,2.113876,0.034075


Sampled Galaxy:


Unnamed: 0,x,y,z,vx,vy,vz,Z,feh,ofe,age,M_stars,tau50,tau10,Z_av
0,-0.483687,1.191799,0.865912,288.251496,538.033608,-81.641892,0.041698,0.380309,0.004004,0.022114,7.538384e+10,7.424664,2.113876,0.034075
1,1.038793,0.192191,0.230207,-186.742462,415.470256,-171.930630,0.037501,0.211359,0.130612,6.610977,7.538384e+10,7.424664,2.113876,0.034075
2,-0.398880,-0.192279,0.075548,578.046970,-22.063512,-95.489212,0.047138,0.399903,0.060388,4.370513,7.538384e+10,7.424664,2.113876,0.034075
3,-0.957755,-0.745118,0.268047,300.838364,-18.260918,-100.838529,0.045219,0.352044,0.086813,4.682090,7.538384e+10,7.424664,2.113876,0.034075
4,-0.017033,0.123668,0.115645,256.649732,338.267846,-8.947619,0.042387,0.308335,0.096010,5.340992,7.538384e+10,7.424664,2.113876,0.034075
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1161195,-0.375837,0.370900,-0.137237,-110.346812,-244.115763,14.787124,0.057152,0.557672,0.004078,2.844539,7.538384e+10,7.424664,2.113876,0.034075
1161196,-0.454199,0.206421,0.277209,-65.532632,-142.642766,-94.457230,0.055069,0.515013,0.026651,0.326781,7.538384e+10,7.424664,2.113876,0.034075
1161197,-0.039169,-0.563065,0.419998,112.137624,-297.988390,-82.918500,0.030379,-0.004914,0.248175,8.757133,7.538384e+10,7.424664,2.113876,0.034075
1161198,-1.909369,-0.320818,-0.147439,151.897630,-58.182497,191.979775,0.046144,0.383342,0.064724,4.465702,7.538384e+10,7.424664,2.113876,0.034075


#### Many ways to sample a galaxy...

In [18]:
#Multiple Galaxies can be sampled at once, e.g. 10**4 and 10**5 stars for galaxy parameters like nb.5 and nb.80, respectively.
import time
start = time.perf_counter()
model_galaxy_alternative = model1.sample_galaxy([10**4,10**5], [galaxy_params, model1.Galaxies[80]["parameters"]], GPUs=[1,2,3,4,5])
end = time.perf_counter()

print(f"Sampling took {end-start} seconds")

print("Sampled Galaxies:")
print(f"Shape of the first galaxy: {model_galaxy_alternative[0].shape}")
display(model_galaxy_alternative[0][:5])
print(f"Shape of the second galaxy: {model_galaxy_alternative[1].shape}")
display(model_galaxy_alternative[1][:5])

Sampling took 9.395179729908705 seconds
Sampled Galaxies:
Shape of the first galaxy: (10000, 14)


Unnamed: 0,x,y,z,vx,vy,vz,Z,feh,ofe,age,M_stars,tau50,tau10,Z_av
0,-0.84059,-1.823525,4.798178,155.68576,49.588147,19.933697,0.029167,-0.018978,0.241455,8.687783,75383840000.0,7.424664,2.113876,0.034075
1,2.324327,-2.012591,-0.413427,119.947698,-91.712378,-141.270816,0.007351,-0.714466,0.294116,10.731671,75383840000.0,7.424664,2.113876,0.034075
2,1.061424,-0.119758,-1.094606,26.830976,2.22849,-70.06215,0.002529,-1.201274,0.310792,12.251304,75383840000.0,7.424664,2.113876,0.034075
3,1.382586,-2.587378,-0.59043,117.672565,-96.941118,-221.348848,0.02114,-0.211043,0.28008,9.160256,75383840000.0,7.424664,2.113876,0.034075
4,1.493349,-0.624674,0.346615,237.406333,282.116067,-150.92296,0.035265,0.127449,0.188283,7.611277,75383840000.0,7.424664,2.113876,0.034075


Shape of the second galaxy: (100000, 14)


Unnamed: 0,x,y,z,vx,vy,vz,Z,feh,ofe,age,M_stars,tau50,tau10,Z_av
10000,0.073019,4.288715,6.652856,48.499003,-106.727831,61.831423,0.002623,-1.047329,0.175916,7.290077,16249190000.0,2.922168,0.210724,0.0065
10001,6.328512,1.204473,0.1113,-15.296846,-21.652143,125.759809,0.004092,-0.866387,0.18641,9.683645,16249190000.0,2.922168,0.210724,0.0065
10002,4.102487,-1.31312,-0.621394,100.595095,-39.739338,25.540288,0.008052,-0.521287,0.141924,2.919462,16249190000.0,2.922168,0.210724,0.0065
10003,0.529137,-1.735043,0.068758,68.882643,141.823073,-152.752742,0.006223,-0.598307,0.104348,4.719141,16249190000.0,2.922168,0.210724,0.0065
10004,2.905968,7.300434,-0.664748,-74.989378,20.023519,-34.295951,0.008539,-0.50465,0.153167,0.458435,16249190000.0,2.922168,0.210724,0.0065


In [20]:
#Calling the function two times is way slower. For small sample sizes almost double the time.
start = time.perf_counter()
model1.sample_galaxy(10**4, galaxy_params, GPUs=[1,2,3,4,5])
model1.sample_galaxy(10**5, model1.Galaxies[80]["parameters"], GPUs=[1,2,3,4,5])
end = time.perf_counter()

print(f"Sampling took {end-start} seconds")

Sampling took 18.839533735066652 seconds


In [21]:
#Different parameters, same number of stars
#Cpu is faster for small sample sizes
model_galaxy_alternative = model1.sample_galaxy(10**4, [galaxy_params, model1.Galaxies[80]["parameters"]], GPUs=None)

print("Sampled Galaxies:")
print(f"Shape of the first galaxy: {model_galaxy_alternative[0].shape}")
display(model_galaxy_alternative[0][:2])
print(f"Shape of the second galaxy: {model_galaxy_alternative[1].shape}")
display(model_galaxy_alternative[1][:2])

Sampled Galaxies:
Shape of the first galaxy: (10000, 14)


Unnamed: 0,x,y,z,vx,vy,vz,Z,feh,ofe,age,M_stars,tau50,tau10,Z_av
0,-0.619627,-0.104275,0.810962,-100.972365,43.255329,-90.862498,0.040476,0.220586,0.163333,7.574108,75383840000.0,7.424664,2.113876,0.034075
1,1.130164,1.281295,0.207629,-200.69762,-6.143884,-98.369261,0.041996,0.337955,0.060329,1.931229,75383840000.0,7.424664,2.113876,0.034075


Shape of the second galaxy: (10000, 14)


Unnamed: 0,x,y,z,vx,vy,vz,Z,feh,ofe,age,M_stars,tau50,tau10,Z_av
10000,-4.337189,-2.022582,4.048942,155.939288,-76.40988,-27.883218,0.006221,-0.617623,0.118621,0.580459,16249190000.0,2.922168,0.210724,0.0065
10001,-8.066043,6.668988,4.895435,-69.734219,-10.294311,-50.136919,0.006733,-0.618976,0.165137,3.217228,16249190000.0,2.922168,0.210724,0.0065


In [22]:
#Same parameters, different number of stars
model_galaxy_alternative = model1.sample_galaxy([10**4,10], galaxy_params, GPUs=None)

print("Sampled Galaxies:")
print(f"Shape of the first galaxy: {model_galaxy_alternative[0].shape}")
display(model_galaxy_alternative[0][:2])
print(f"Shape of the second galaxy: {model_galaxy_alternative[1].shape}")
display(model_galaxy_alternative[1][:2])

Sampled Galaxies:
Shape of the first galaxy: (10000, 14)


Unnamed: 0,x,y,z,vx,vy,vz,Z,feh,ofe,age,M_stars,tau50,tau10,Z_av
0,0.124068,-0.339075,-0.17017,7.430693,132.681957,-45.74146,0.05651,0.517783,0.038611,1.695925,75383840000.0,7.424664,2.113876,0.034075
1,0.070731,2.014259,1.159265,-54.529091,64.100823,95.57784,0.005502,-0.821757,0.275627,11.012002,75383840000.0,7.424664,2.113876,0.034075


Shape of the second galaxy: (10, 14)


Unnamed: 0,x,y,z,vx,vy,vz,Z,feh,ofe,age,M_stars,tau50,tau10,Z_av
10000,0.91856,-0.250366,-0.378983,238.300646,85.499372,38.375743,0.030645,0.021469,0.224087,8.586376,75383840000.0,7.424664,2.113876,0.034075
10001,-1.873913,-0.063221,-0.121218,-13.87753,-433.5588,22.233247,0.040862,0.276406,0.110797,5.973269,75383840000.0,7.424664,2.113876,0.034075


In [23]:
#Now about how the sample may be returned: the reinsert_conditions argument

#Default (before) is "all", which means that the conditions are reinserted into the sample
#And the stars are returned as a pandas DataFrame

#Also possible: "none"
#This means that the conditions are not reinserted into the sample
model_galaxy_alternative = model1.sample_galaxy(10**4, galaxy_params, GPUs=None, reinsert_conditions="none")

print('Mode "none":')
display(model_galaxy_alternative[:2])

#Also possible: "local"
#This means that only conditions like "x" are reinserted into the sample, but not galaxy parameters like "M_stars"
#Here we have no conditions like "x", so it is the same as "none"
model_galaxy_alternative = model1.sample_galaxy(10**4, galaxy_params, GPUs=None, reinsert_conditions="local")

print('Mode "local":')
display(model_galaxy_alternative[:2])

#Also possible: "galaxy"
#This changes the returned Data type to the standard galaxy type, a list of dicts one for each galaxy, just like model1.Galaxies
#Then local conditions are reinserted into the DataFrame, but the parameters are added as "parameters" key to the dict
model_galaxy_alternative = model1.sample_galaxy([10**4]*2, galaxy_params, GPUs=None, reinsert_conditions="galaxy")

print('Mode "galaxy":')
for galaxy in model_galaxy_alternative:
    for key, value in galaxy.items():
        print(f'key "{key}" contains a {type(value).__name__}:')
        display(value)

Mode "none":


Unnamed: 0,x,y,z,vx,vy,vz,Z,feh,ofe,age
0,2.34291,-1.576104,0.043716,583.908737,15.091689,-302.749715,0.000879,-1.504617,0.368202,11.909729
1,0.445078,0.090867,-0.057189,-37.492728,396.663437,-295.889271,0.042465,0.308551,0.093657,4.814946


Mode "local":


Unnamed: 0,x,y,z,vx,vy,vz,Z,feh,ofe,age
0,1.615972,0.826648,0.36099,-22.803271,-140.571486,83.684307,0.028366,-0.033414,0.242822,9.079183
1,-0.331518,0.683266,0.401244,-531.602764,143.449942,60.35967,0.04127,0.312924,0.076831,3.850073


Mode "galaxy":
key "stars" contains a DataFrame:


Unnamed: 0,x,y,z,vx,vy,vz,Z,feh,ofe,age
0,0.134514,0.207580,-0.158305,-369.889428,-501.718467,110.485493,0.028959,-0.010248,0.232318,8.734981
1,0.599711,-0.343911,0.277071,-31.236459,166.673666,-15.013554,0.054871,0.496502,0.044518,0.936866
2,-1.910993,0.497915,-0.660689,-363.439319,-699.228088,-72.711414,0.036890,0.264277,0.059885,1.526954
3,18.107351,7.451961,5.897420,-106.275777,-117.019976,-86.419400,0.015735,-1.633231,0.367836,7.812597
4,-0.261826,-0.286539,0.121487,432.132296,-81.061266,-96.199043,0.048704,0.418088,0.059661,4.698034
...,...,...,...,...,...,...,...,...,...,...
9995,0.854146,-0.525359,0.000730,147.864196,202.703873,-191.556561,0.032002,0.079076,0.184335,7.989995
9996,-1.716466,1.127810,0.005812,-145.940617,-124.136691,142.056035,0.036549,0.241560,0.086318,4.930831
9997,-13.297447,3.032041,-0.673327,-147.199928,-40.658582,-9.417333,0.031509,0.009580,0.244093,9.199782
9998,-0.237980,-0.452682,0.371032,176.938563,35.464305,-244.263461,0.038862,0.210993,0.152727,6.998247


key "parameters" contains a DataFrame:


Unnamed: 0,M_stars,tau50,tau10,Z_av
0,75383840000.0,7.424664,2.113876,0.034075


## Evaluating the results

In [None]:
#E.g. visualize it now
import res_flow_vis as visual

In [None]:
#The plot fn do not yet support the new galaxy data (dicts+DataFrames), so we have to convert it back to the old one (np.arrays)
visual.get_result_plots(data_galaxy["stars"].values, model_galaxy.values, label = "API_TEST")

In [28]:
#We can also check some important internal parameters of the model
print("Internal Parameters:")
display(model1.processor.mu)
display(model1.processor.std)
print("")
print("Flow architecture:")
display(Markdown(model1.flow_architecture))

Internal Parameters:


x          -0.045790
y          -0.020172
z          -0.043150
vx         -0.679297
vy          1.630646
vz         -0.674013
Z           0.018766
feh        -0.403538
ofe         0.170605
age         6.282848
M_stars    10.253566
tau50       6.385232
tau10       2.018688
Z_av        0.018766
dtype: float64

x            3.596655
y            3.168734
z            1.898478
vx         157.117332
vy         148.366113
vz         104.888096
Z            0.015649
feh          0.614122
ofe          0.099688
age          3.430209
M_stars      0.780787
tau50        1.971908
tau10        1.109925
Z_av         0.011113
dtype: float64


Flow architecture:


Data&nbsp;dim:&nbsp;10,&nbsp;Condition&nbsp;dim:&nbsp;4<br><br>Flow&nbsp;architecture:<br>Type&nbsp;of&nbsp;coupling&nbsp;layer:&nbsp;NSF_CL2<br>Number&nbsp;of&nbsp;layers:&nbsp;14<br>Number&nbsp;of&nbsp;spline&nbsp;bins:&nbsp;10<br>Spline&nbsp;range:&nbsp;3<br>Base&nbsp;network:&nbsp;MLP<br><br>Base&nbsp;network&nbsp;architecture:<br>Number&nbsp;of&nbsp;layers:&nbsp;4<br>Number&nbsp;of&nbsp;neurons&nbsp;per&nbsp;layer:&nbsp;128<br>Leaky&nbsp;ReLU&nbsp;slope:&nbsp;0.20000000298023224

## Saving the model and loading from saved file

In [25]:
#Save the model to a file
model1.save("GF_model_0.pth")

In [27]:
#Somewhere else, or another day/session
#We need no more than (the imports and):
model2 = API.GalacticFlow("GF_model_0.pth")

In [29]:
#All parameters are still there
print("Internal Parameters:")
display(model2.processor.mu)
display(model2.processor.std)
print("")
print("Flow architecture:")
display(Markdown(model2.flow_architecture))

Internal Parameters:


x          -0.045790
y          -0.020172
z          -0.043150
vx         -0.679297
vy          1.630646
vz         -0.674013
Z           0.018766
feh        -0.403538
ofe         0.170605
age         6.282848
M_stars    10.253566
tau50       6.385232
tau10       2.018688
Z_av        0.018766
dtype: float64

x            3.596655
y            3.168734
z            1.898478
vx         157.117332
vy         148.366113
vz         104.888096
Z            0.015649
feh          0.614122
ofe          0.099688
age          3.430209
M_stars      0.780787
tau50        1.971908
tau10        1.109925
Z_av         0.011113
dtype: float64


Flow architecture:


Data&nbsp;dim:&nbsp;10,&nbsp;Condition&nbsp;dim:&nbsp;4<br><br>Flow&nbsp;architecture:<br>Type&nbsp;of&nbsp;coupling&nbsp;layer:&nbsp;NSF_CL2<br>Number&nbsp;of&nbsp;layers:&nbsp;14<br>Number&nbsp;of&nbsp;spline&nbsp;bins:&nbsp;10<br>Spline&nbsp;range:&nbsp;3<br>Base&nbsp;network:&nbsp;MLP<br><br>Base&nbsp;network&nbsp;architecture:<br>Number&nbsp;of&nbsp;layers:&nbsp;4<br>Number&nbsp;of&nbsp;neurons&nbsp;per&nbsp;layer:&nbsp;128<br>Leaky&nbsp;ReLU&nbsp;slope:&nbsp;0.20000000298023224

In [30]:
#Also the components and conditions
print(f'Components: {model2.get_components("stars")}')
print(f'Conditions: {model2.get_conds("stars")}')

Components: ['x', 'y', 'z', 'vx', 'vy', 'vz', 'Z', 'feh', 'ofe', 'age']
Conditions: ['M_stars', 'tau50', 'tau10', 'Z_av']


In [None]:
#Preperation yields the same data
#model2.prepare()
data_galaxy = model2.Galaxies[5]

data_params = data_galaxy["parameters"]

model_galaxy = model2.sample_galaxy(len(data_galaxy["stars"]), data_params, GPUs=None, reinsert_conditions="galaxy")

In [None]:
#Can again be visualized and remains the same
visual.get_result_plots(data_galaxy["stars"].values, model_galaxy["stars"].values, label = "API_TEST2")