# Demo for the data loader

In [1]:
# External packages
import os
import numpy as np
import matplotlib.pyplot as plt
import torch
import pandas as pd

# Internal code import
import physo
import physo.learn.monitoring as monitoring
from physo.benchmark.utils import symbolic_utils as su
import sympy
from sympy import lambdify
import copy

Enforcing spawn multiprocessing start method.
Parallel mode is not available because physo is being ran from a notebook using 'spawn' multiprocessing start method (multiprocessing.get_start_method() = 'spawn'). Run physo from a python script to use parallel mode.
Parallel mode is not available because physo is being ran from a notebook using 'spawn' multiprocessing start method (multiprocessing.get_start_method() = 'spawn'). Run physo from a python script to use parallel mode.

Multiprocessing start method : spawn
Running from notebook : True
Is CUDA available : False
Total nb. of CPUs available :  96
Recommended config {'parallel_mode': False, 'n_cpus': 96}
Parallel mode is not available because physo is being ran from a notebook using 'spawn' multiprocessing start method (multiprocessing.get_start_method() = 'spawn'). Run physo from a python script to use parallel mode.
Parallel mode is not available because physo is being ran from a notebook using 'spawn' multiprocessing start metho



In [2]:
DATA_PATH = '../Nbody/'

In [3]:
def list_folders(path):
    # List all entries in the directory given by path
    entries = os.listdir(path)
    # Filter out only directories
    folders = [entry for entry in entries if os.path.isdir(os.path.join(path, entry))]
    return [s for s in folders if '_' in s]

In [4]:
def load_data(model_name, myvars, simrange='mass_0.0-0.5', simtype='mass'):

    data = {}
    for name, filetag in myvars.items():
        data[name] = pd.read_csv(DATA_PATH + model_name + "/{}/{}_{}.dat".format(simtype,simrange,filetag), \
                                 delimiter=" ", header=None).to_numpy()
    data['age'] = pd.read_csv(DATA_PATH + model_name + "/age.dat", sep=r"\s+", header=None).to_numpy().flatten()
    
    return data

In [5]:
def dropna(data):
    selected_idx = ~np.isnan(data['mass'])
    for name in set(data.keys()) - set(['age']):
        data[name] = data[name][selected_idx]
    return data

In [18]:
def reduce(data, ref_var='mass', factor=100):
    selected_idx = range(0, len(data[ref_var]), factor)
    print(selected_idx)
    print(data['age'][idx], data[ref_var][idx])
    data['age'] = np.concatenate([np.full_like(data[ref_var][idx], data['age'][idx]) for idx in selected_idx])
    for name in set(data.keys()) - set(['age']):
        data[name] = data[name][selected_idx]
    return data

In [7]:
def target_vars(variables, ref_vars=['mass', 'age']):
    return list(set(variables)-set(ref_vars))

In [8]:
models = list_folders(DATA_PATH)
print(models)
selected_model = models[1]

['250k_A_R2_10', '250k_A_R4_25_imf50', '250k_A_R4_25_retr', '250k_A_R4_25_lk', '250k_A_R2_25_vlk', '500k_C_R4_10', '250k_C_R2_10', '250k_C_R4_25', '250k_A_R4_25', '500k_A_R4_LC_part2', '250k_B_R4_25', '250k_C_R4_25_lk', '1.5M_A_R4_10', '250k_B_R4_25_lk', '250k_A_R2_25', '250k_C_R4_10', '250k_A_R2_5', '250k_A_R4_10', '250k_W6_R4_25_retr', '250k_W6_R4_25', '500k_A_R2_10', '250k_A_R4_10_retr', '250k_A_R4_25_vlk', '500k_A_R4_10', '500k_A_R4_LC_part1']


In [9]:
variables = {'mass': 'mass_bin', 'velocity': 'vphi', 'dispersion': 'disp_phi', 'radius': 'radial_bin'}
data = load_data(selected_model, variables)
data = dropna(data)
print(data.keys())
print(data)

dict_keys(['mass', 'velocity', 'dispersion', 'radius', 'age'])
{'mass': array([0.081, 0.084, 0.086, ..., 0.873, 0.952, 1.304], shape=(128244,)), 'velocity': array([4.401, 4.478, 4.561, ..., 1.335, 1.361, 1.287], shape=(128244,)), 'dispersion': array([4.223, 4.264, 4.297, ..., 1.847, 1.744, 1.72 ], shape=(128244,)), 'radius': array([2.537, 2.495, 2.523, ..., 7.103, 6.979, 6.319], shape=(128244,)), 'age': array([0.00000000e+00, 6.51021680e+00, 1.30204336e+01, ...,
       1.39839457e+04, 1.39904559e+04, 1.39969661e+04], shape=(2150,))}


In [16]:
reduced_data = reduce(data)

myvars = target_vars(data.keys())

fig, ax = plt.subplots(len(myvars), 1, figsize=(6,6))
fig.suptitle("model: " + selected_model)
for n,name in enumerate(myvars):
    sc = ax[n].scatter(reduced_data['mass'], reduced_data[name], c=reduced_data['age'], cmap='viridis', s=5)
    cbar = plt.colorbar(sc)
    cbar.set_label("age [Myrs]")
    ax.set_xlabel("mass [M_\odot]")
plt.show()

range(0, 13, 100)


ValueError: zero-dimensional arrays cannot be concatenated