In [5]:
import glob
import subprocess
import h5py
import json
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from qgridnext import show_grid
import os
import re

In [6]:
import sxs

In [7]:
ls /groups/sxs/vtommasi/SimulationAnnex

[0m[01;34mCatalogAnalysis[0m/            [01;34mIncoming[0m/      [01;34mPublic[0m/
changelog_md_content.json   Makefile       [01;34mPublicLinks[0m/
changelog_md_format.json    [01;34mPrivate[0m/       README.md
changelog_postprocess.json  [01;34mPrivateLinks[0m/  [01;34mtools[0m/


In [8]:
simulations = sxs.load("simulations", dir_annex="/groups/sxs/vtommasi/SimulationAnnex")
print(len(simulations))

2778


In [9]:
# Characteristics we have available
print(simulations.dataframe.columns)

Index(['deprecated', 'reference_time', 'reference_mass_ratio',
       'reference_dimensionless_spin1', 'reference_dimensionless_spin1_mag',
       'reference_dimensionless_spin2', 'reference_dimensionless_spin2_mag',
       'reference_chi_eff', 'reference_chi1_perp', 'reference_chi2_perp',
       'reference_eccentricity', 'reference_eccentricity_bound',
       'reference_mean_anomaly', 'reference_orbital_frequency',
       'reference_orbital_frequency_mag', 'reference_separation',
       'reference_position1', 'reference_position2', 'reference_mass1',
       'reference_mass2', 'reference_chi1_mag', 'reference_chi2_mag',
       'relaxation_time', 'common_horizon_time', 'remnant_mass',
       'remnant_dimensionless_spin', 'remnant_dimensionless_spin_mag',
       'remnant_velocity', 'remnant_velocity_mag', 'EOS', 'disk_mass',
       'ejecta_mass', 'object_types', 'initial_data_type',
       'initial_separation', 'initial_orbital_frequency', 'initial_adot',
       'initial_ADM_energy', 'in

In [10]:
df=simulations.dataframe.reset_index() #The first column contains the names, but is also an index, so reset the index to access it as a regular column
first_col = df.columns[0]

#Filter to only keep BBH_SKS simulations
df = df[df["initial_data_type"].astype(str) == "BBH_SKS"]

# Count the number of BBH_SKS simulations
bbh_sks_count = df.shape[0]
print(f"Number of SKS BBH simulations is {bbh_sks_count}")

Number of SKS BBH simulations is 2401


In [12]:
# Save simulations as HDF5
# Some columns are arrays with multiple elements, so do this to keep all the values (converts each NumPy array into a string representation of a list)
df["reference_dimensionless_spin1"] = df["reference_dimensionless_spin1"].apply(lambda x: str(x.tolist()) if isinstance(x, np.ndarray) else x)
df["reference_dimensionless_spin2"] = df["reference_dimensionless_spin2"].apply(lambda x: str(x.tolist()) if isinstance(x, np.ndarray) else x)
df["reference_orbital_frequency"] = df["reference_orbital_frequency"].apply(lambda x: str(x.tolist()) if isinstance(x, np.ndarray) else x)
df["reference_position1"] = df["reference_position1"].apply(lambda x: str(x.tolist()) if isinstance(x, np.ndarray) else x)
df["reference_position2"] = df["reference_position2"].apply(lambda x: str(x.tolist()) if isinstance(x, np.ndarray) else x)
df["remnant_dimensionless_spin"] = df["remnant_dimensionless_spin"].apply(lambda x: str(x.tolist()) if isinstance(x, np.ndarray) else x)
df["reference_dimensionless_spin1"] = df["reference_dimensionless_spin1"].apply(lambda x: str(x.tolist()) if isinstance(x, np.ndarray) else x)
df["remnant_velocity"] = df["remnant_velocity"].apply(lambda x: str(x.tolist()) if isinstance(x, np.ndarray) else x)
df["initial_ADM_linear_momentum"] = df["initial_ADM_linear_momentum"].apply(lambda x: str(x.tolist()) if isinstance(x, np.ndarray) else x)
df["initial_ADM_angular_momentum"] = df["initial_ADM_angular_momentum"].apply(lambda x: str(x.tolist()) if isinstance(x, np.ndarray) else x)
df["initial_dimensionless_spin1"] = df["initial_dimensionless_spin1"].apply(lambda x: str(x.tolist()) if isinstance(x, np.ndarray) else x)
df["initial_dimensionless_spin2"] = df["initial_dimensionless_spin2"].apply(lambda x: str(x.tolist()) if isinstance(x, np.ndarray) else x)
df["initial_position1"] = df["initial_position1"].apply(lambda x: str(x.tolist()) if isinstance(x, np.ndarray) else x)
df["initial_position2"] = df["initial_position2"].apply(lambda x: str(x.tolist()) if isinstance(x, np.ndarray) else x)
if "superseded_by" in df.columns:
    df["superseded_by"] = df["superseded_by"].apply(lambda x: str(x) if not isinstance(x, str) else x)
df["DOI_versions"] = df["DOI_versions"].apply(lambda x: ', '.join(x) if isinstance(x, list) else x)
df["keywords"] = df["keywords"].apply(lambda x: ', '.join(x) if isinstance(x, list) else x)

try:
    df.to_hdf("filtered_simulations_2.h5", key="filtered_data", mode="w", format="table")
    print("Filtered DataFrame successfully saved as HDF5!")
except Exception as e:
    print(f"Error saving filtered DataFrame as HDF5: {e}")

Filtered DataFrame successfully saved as HDF5!
