# Demo for the data loader

In [7]:
# External packages
import os
import numpy as np
import matplotlib.pyplot as plt
import torch
import pandas as pd

# Internal code import
import physo
import physo.learn.monitoring as monitoring
from physo.benchmark.utils import symbolic_utils as su
import sympy
from sympy import lambdify
import copy

In [11]:
DATA_PATH = '../Nbody/'

In [3]:
def list_folders(path):
    # List all entries in the directory given by path
    entries = os.listdir(path)
    # Filter out only directories
    folders = [entry for entry in entries if os.path.isdir(os.path.join(path, entry))]
    return list(filter(lambda item: item != 'README', folders))

In [17]:
def load_data(model_name, myvars, simrange='mass_0.0-0.5', simtype='mass'):

    data = {}
    for name, filetag in myvars.items():
        data[name] = pd.read_csv(DATA_PATH + model_name + "/{}/{}_{}.dat".format(simtype,simrange,filetag), \
                                 delimiter=" ", header=None).to_numpy()
    data['age'] = pd.read_csv(DATA_PATH + model_name + "/age.dat", sep=r"\s+", header=None).to_numpy().flatten()
    
    return data

In [35]:
def dropna(data):
    selected_idx = ~np.isnan(data['mass'])
    for name in set(data.keys()) - set(['age']):
        data[name] = data[name][selected_idx]
    return data

In [36]:
models = list_folders(DATA_PATH)

variables = {'mass': 'mass_bin', 'velocity': 'vphi', 'dispersion': 'disp_phi', 'radius': 'radial_bin'}
data = load_data(models[0], variables)
data = dropna(data)
print(data)

{'mass': array([0.081, 0.084, 0.086, ..., 0.856, 0.942, 1.214], shape=(183954,)), 'velocity': array([5.913, 6.509, 6.396, ..., 0.662, 0.715, 0.677], shape=(183954,)), 'dispersion': array([6.328, 6.247, 6.07 , ..., 2.033, 1.991, 1.859], shape=(183954,)), 'radius': array([1.246, 1.268, 1.24 , ..., 5.677, 5.573, 5.144], shape=(183954,)), 'age': array([0.00000000e+00, 4.55924638e+00, 9.11849275e+00, ...,
       1.39968864e+04, 1.39980262e+04, 1.39991660e+04], shape=(3598,))}
