In [1]:
# -*- coding: utf-8 -*-
"""
Created On April 30, 2024
Last modified on April 30, 2024

Description: 
To Process and interpolate the stellar yields from NuPyCEE, https://github.com/NuGrid/NuPyCEE/tree/master
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import sys
import re
import h5py
sys.path.insert(0, '/Users/liuguanfu/Workspace/SAS-21/targets/MRK1216/spex/jupyter/IMF/chemevoimf')
import utils
from scipy import interpolate
import shutil
import pyatomdb
%matplotlib widget

In [2]:
# Process the yields from AGB stars and massive stars
file_paths = [
    "./Original/NuPyCEE/Yields/agb_and_massive_stars_C15_LC18_R_mix.txt",
    "./Original/NuPyCEE/Yields/agb_and_massive_stars_C15_N13_0_0_HNe.txt",
    "./Original/NuPyCEE/Yields/agb_and_massive_stars_C15_N13_0_5_HNe.txt",
    "./Original/NuPyCEE/Yields/agb_and_massive_stars_C15_N13_1_0_HNe.txt",
    "./Original/NuPyCEE/Yields/agb_and_massive_stars_K10_K06_0.0HNe.txt",
    "./Original/NuPyCEE/Yields/agb_and_massive_stars_K10_K06_0.5HNe.txt",
    "./Original/NuPyCEE/Yields/agb_and_massive_stars_K10_K06_1.0HNe.txt",
    "./Original/NuPyCEE/Yields/agb_and_massive_stars_K10_LC18_R000.txt",
    "./Original/NuPyCEE/Yields/agb_and_massive_stars_K10_LC18_R150.txt",
    "./Original/NuPyCEE/Yields/agb_and_massive_stars_K10_LC18_R300.txt",
    "./Original/NuPyCEE/Yields/agb_and_massive_stars_K10_LC18_Ravg.txt",
    "./Original/NuPyCEE/Yields/agb_and_massive_stars_nugrid_FRUITY.txt",
    "./Original/NuPyCEE/Yields/agb_and_massive_stars_nugrid_K06.txt",
    "./Original/NuPyCEE/Yields/agb_and_massive_stars_nugrid_K10.txt",
    "./Original/NuPyCEE/Yields/agb_and_massive_stars_nugrid_MESAonly_fryer12delay_wind_preexp.txt",
    "./Original/NuPyCEE/Yields/agb_and_massive_stars_nugrid_MESAonly_fryer12mix.txt",
    "./Original/NuPyCEE/Yields/agb_and_massive_stars_nugrid_MESAonly_ye.txt",
    "./Original/NuPyCEE/Yields/agb_and_massive_stars_nugrid_MESAonly_fryer12rapid.txt",
    "./Original/NuPyCEE/Yields/agb_and_massive_stars_nugrid_MESAonly_fryer12delay.txt",
    "./Original/NuPyCEE/Yields/agb_and_massive_stars_nugrid_N13.txt",
    "./Original/NuPyCEE/Yields/other/isotope_yield_table_portinari98_marigo01_gce_totalyields.txt",
    "./Original/NuPyCEE/Yields/other/isotope_yield_table_MESA_only.txt",
    "./Original/NuPyCEE/Yields/other/isotope_yield_table_MESA_only_fryer12_delay_neutrons.txt",
    "./Original/NuPyCEE/Yields/other/isotope_yield_table_MESA_only_fryer12_exclnalpha.txt",
    "./Original/NuPyCEE/Yields/other/isotope_yield_table_MESA_only_fryer12_rapid_neutrons.txt",
    "./Original/NuPyCEE/Yields/other/isotope_yield_table_MESA_only_ye_neutrons.txt",
    # "./Original/NuPyCEE/Yields/other/isotope_yield_table_portinari98_marigo01_withg.txt", 
    # The last line is "G", which is not an element.
    "./Original/NuPyCEE/Yields/other/isotope_yield_table_wiersma09.txt",
]
# Selected yields from NuPyCEE, https://github.com/NuGrid/NuPyCEE/tree/master/yield_tables


for file_path in file_paths:
    out_dir = './NuPyCEE/' + file_path.split('/')[-1].replace(".txt", "")
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    shutil.copy(file_path, out_dir)
    with open (file_path, 'r') as f:
        lines = f.readlines()
    table_sep = [ ]  # To store the line number where the table starts and ends
    for i, line in enumerate(lines):
        if line.startswith("H Table:"):
            table_sep.append(i)
    tables = { }  # To store the tables

    for i, line_num in enumerate(table_sep):
        Zini = 'Z='+re.search(r"Z=(\d+(\.\d+)?)", lines[line_num]).group(1)
        if Zini in tables.keys():
            pass
        else:
            tables[Zini] = [ ]
        if i == len(table_sep)-1:
            tables[Zini].append(lines[line_num:])
        else:
            tables[Zini].append(lines[line_num:table_sep[i+1]])

    # Table of the same initial metallicity are stored in the same list
    dfs = { }  # To store the dataframes
    mass_lifetime = { }  # To store the mass and lifetime of the stars
    for Zini in tables.keys():
        if Zini in dfs.keys():
            pass
        else:
            dfs[Zini] = [ ]
            mass_lifetime[Zini] = [ ]
        for i, table in enumerate(tables[Zini]):
            for idx, item in enumerate(table):
                if item.startswith("&Isotopes"):
                    break
            table_data = [line.replace('\n', '') for line in table[idx:]]
            table_data = [line.replace('&', ' ').split() for line in table_data]
            df = pd.DataFrame(table_data[1:], columns=table_data[0])
            Mrem = re.search(r"Mfinal: ([+-]?\d*\.?\d+[eE][+-]?\d+)", table[2]).group(1)
            lifetime = re.search(r"Lifetime: ([+-]?\d*\.?\d+[eE][+-]?\d+)", table[1]).group(1)
            Mini = re.search(r"M=([+-]?\d*\.?\d+[eE]?[+-]?\d*)", table[0]).group(1)
            mass_lifetime[Zini].append([float(Mini), float(lifetime)])
            if 'X0' in df.columns:
                df = df.astype({'Isotopes':str, 'Yields':float, 'X0':float, 'Z':int, 'A':int})
            else:
                df = df.astype({'Isotopes':str, 'Yields':float, 'Z':int, 'A':int})
                df['X0'] = 0
                df = df[['Isotopes', 'Yields', 'Z', 'X0', 'A']]
            df['Isotopes'] = df['Isotopes'].str.replace(r'-\d+', '', regex=True)
            df = df.groupby('Isotopes').sum().reset_index()
            # After groupby, the order of columns is changed
            df = df[['Isotopes', 'Yields', 'Z', 'X0', 'A']]
            df['Z'] = df['Isotopes'].apply(lambda x: pyatomdb.atomic.elsymb_to_Z(x))
            # reset_index is a must.
            df = df.reset_index(drop=True)
            df.loc[len(df)] = ['Mrem', float(Mrem), 0, 0, 0]
            # Z=0 for the Mrem is used to keep it in the first row
            if float(Mini) - df['Yields'].sum() > 0:
                df.loc[len(df)] = ["Other", float(Mini) - df['Yields'].sum(), 31, 0, 0]
            else:
                df.loc[len(df)] = ["Other", 0, 31, 0, 0]
            # Z=0 is the Mrem
            # Z=1, 2, 3, ..., 30 are the first 30 elements
            # Z=31 is the rest of the elements
            # Drop the isotopes with Z>30
            df = df.loc[(df['Z']<=31) & (df['Isotopes']!='Ga')]
            df.sort_values(by='Z', inplace=True)
            df.rename(columns={'Isotopes':'M', 'Yields':Mini}, inplace=True)
            df.drop(columns=['Z', 'X0', 'A'], inplace=True)
            df.set_index('M', inplace=True)
            dfs[Zini].append(df)
    for key in dfs.keys():
        # df1 are the dataframes with the same initial metallicity
        df1 = pd.concat(dfs[key], axis=1)
        df1.to_csv(os.path.join(out_dir, '%s.csv' % key))
        # Mass grids where the yields are to be extra or interpolated
        columns = ["%0.6e" % a for a in np.logspace(np.log10(0.08), np.log10(150), 300)]
        # Empty dataframe with the same index as df1
        df2 = pd.DataFrame(np.zeros((len(df1.index), len(columns))), index=df1.index, columns=columns)
        for i, row in df1.iterrows():
            x1 = row.index.to_numpy().astype(float)  # x1 is the initial mass from original yields
            y1 = row.to_numpy().astype(float)  # y1 is the remnant mass from original yields
            df2.loc[row.name, :] = [utils.extra_interpolate_yields(x1, y1, row.name, float(col)) for col in columns]
        df2.to_csv(os.path.join(out_dir, '%s_interpolated.csv' % key))
    with h5py.File(os.path.join(out_dir, 'yields1.h5'), 'w') as f:
        url = ['https://github.com/NuGrid/NuPyCEE/blob/master/yield_tables/']
        url.append("/".join(file_path.split('/')[4:]))
        url = "".join(url)
        f.attrs['OriginalURL'] = url
        f.attrs['OriginalFile'] = file_path.split('/')[-1]
        for key in dfs.keys():
            # Create a group for each initial metallicity
            f.create_group(key)
            df1 = pd.read_csv(os.path.join(out_dir, '%s.csv' % key))
            # hdf5 does not support string with object type
            # "|S" will find the maximum length of the string in the selected column
            df1['M'] = df1['M'].astype('|S')
            data = df1.to_records(index=False)
            dtype = df1.to_records(index=False).dtype
            f[key].attrs['Z'] = "%s" % key[2:]
            f[key].attrs['MassUnit'] = 'Msun'
            f[key].create_dataset('Original', data=data, dtype=dtype)

            df2 = pd.read_csv(os.path.join(out_dir, '%s_interpolated.csv' % key))
            df2['M'] = df2['M'].astype('|S')
            data = df2.to_records(index=False)
            dtype = df2.to_records(index=False).dtype
            f[key].create_dataset('Interpolated', data=df2.to_records(), dtype=dtype)
            f[key].create_dataset('MassLifetime', data=np.array(mass_lifetime[key]))
    # Read yields1.h5
    # f = h5py.File(os.path.join(out_dir, 'yields1.h5'), 'r')

    # yields2.h5 is the other kind of hdf5 file by using to_hdf
    # but the content is the same as yields1.h5
    with h5py.File(os.path.join(out_dir, 'yields2.h5'), 'w') as f:
        url = ['https://github.com/NuGrid/NuPyCEE/blob/master/yield_tables/']
        url.append("/".join(file_path.split('/')[4:]))
        url = "".join(url)
        f.attrs['OriginalURL'] = url
        f.attrs['OriginalFile'] = file_path.split('/')[-1]
    for key in dfs.keys():
        df1 = pd.read_csv(os.path.join(out_dir, '%s.csv' % key), index_col=0)
        df1.to_hdf(os.path.join(out_dir, 'yields2.h5'), key='/%s/Original' % key.replace("=", "_").replace(".", "_"), mode='a')
        df2 = pd.read_csv(os.path.join(out_dir, '%s_interpolated.csv' % key), index_col=0)
        df2.to_hdf(os.path.join(out_dir, 'yields2.h5'), key='/%s/Interpolated' % key.replace("=", "_").replace(".", "_"), mode='a')
    # Add comments
    with h5py.File(os.path.join(out_dir, 'yields2.h5'), 'a') as f:
        for key in dfs.keys():
            f[key.replace("=", "_").replace(".", "_")].create_dataset('MassLifetime', data=np.array(mass_lifetime[key]))
            f[key.replace("=", "_").replace(".", "_")].attrs['Z'] = "%s" % key[2:]
            f[key.replace("=", "_").replace(".", "_")].attrs['MassUnit'] = 'Msun'
    
    # Read yields2.h5
    # pd.read_hdf(os.path.join(out_dir, 'yields2.h5'), key="Z_0_0004/original")

In [3]:
# Process the yields from SNIa
file_paths = [
    "./Original/NuPyCEE/Yields/sn1a_i99_CDD1.txt",
    "./Original/NuPyCEE/Yields/sn1a_i99_CDD2.txt",
    "./Original/NuPyCEE/Yields/sn1a_i99_W7.txt",
    "./Original/NuPyCEE/Yields/sn1a_ivo12_mix_z.txt",
    "./Original/NuPyCEE/Yields/sn1a_ivo12_stable_z.txt",
    "./Original/NuPyCEE/Yields/sn1a_ivo12_unstable_z.txt",
    "./Original/NuPyCEE/Yields/sn1a_ivo13_mix_z.txt",
    "./Original/NuPyCEE/Yields/sn1a_ivo13_stable_z.txt",
    "./Original/NuPyCEE/Yields/sn1a_ivo13_unstable_z.txt",
    # "./Original/NuPyCEE/Yields/sn1a_t03.txt",  # The last line is "G", which is not an element.
    "./Original/NuPyCEE/Yields/sn1a_t86.txt"
]

for file_path in file_paths:
    out_dir = './NuPyCEE/' + file_path.split('/')[-1].split('.')[0]
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    shutil.copy(file_path, out_dir)
    with open (file_path, 'r') as f:
        lines = f.readlines()
    for idx, line in enumerate(lines):
        if line.startswith("&Isotopes"):
            break
    table_data = [line.replace('\n', '') for line in lines[idx:]]
    table_data = [line.replace('&', ' ').split() for line in table_data]
    df = pd.DataFrame(table_data[1:], columns=table_data[0])
    df.iloc[:, 1:] = df.iloc[:, 1:].astype(float)
    df['Isotopes'] = df['Isotopes'].str.replace(r'-\d+', '', regex=True)
    df = df.groupby('Isotopes').sum().reset_index()
    df.rename(columns={'Isotopes':'M'}, inplace=True)
    df['Z'] = df['M'].apply(lambda x: pyatomdb.atomic.elsymb_to_Z(x))
    df.sort_values(by='Z', inplace=True)
    # Drop the isotopes with Z>30
    df = df.loc[df['Z']<=30]
    df.drop(columns=['Z'], inplace=True)
    df.set_index('M', inplace=True)
    df.to_csv(os.path.join(out_dir, 'yields.csv'))
    dfs = { }
    for col in df.columns:
        dfs[col] = df[[col]].copy()
        dfs[col].to_csv(os.path.join(out_dir, '%s.csv' % col))
    
    with h5py.File(os.path.join(out_dir, 'yields1.h5'), 'w') as f:
        url = ['https://github.com/NuGrid/NuPyCEE/blob/master/yield_tables/']
        url.append("/".join(file_path.split('/')[4:]))
        url = "".join(url)
        f.attrs['OriginalURL'] = url
        f.attrs['OriginalFile'] = file_path.split('/')[-1]
        for key in dfs.keys():
            # Create a group for each initial metallicity
            f.create_group(key)
            df1 = pd.read_csv(os.path.join(out_dir, '%s.csv' % key))
            # hdf5 does not support string with object type
            # "|S" will find the maximum length of the string in the selected column
            df1['M'] = df1['M'].astype('|S')
            data = df1.to_records(index=False)
            dtype = df1.to_records(index=False).dtype
            f[key].attrs['Z'] = "%s" % key[2:]
            f[key].attrs['MassUnit'] = 'Msun'
            f[key].create_dataset('Original', data=data, dtype=dtype)
    # Read yields.h5
    # f = h5py.File(os.path.join(out_dir, 'yields.h5'), 'r')

    # yields1.h5 is the other kind of hdf5 file by using to_hdf
    # but the content is the same as yields.h5
    with h5py.File(os.path.join(out_dir, 'yields2.h5'), 'w') as f:
        url = ['https://github.com/NuGrid/NuPyCEE/blob/master/yield_tables/']
        url.append("/".join(file_path.split('/')[4:]))
        url = "".join(url)
        f.attrs['OriginalURL'] = url
        f.attrs['OriginalFile'] = file_path.split('/')[-1]
    for key in dfs.keys():
        df1 = pd.read_csv(os.path.join(out_dir, '%s.csv' % key), index_col=0)
        df1.to_hdf(os.path.join(out_dir, 'yields2.h5'), key='/%s/Original' % key.replace("=", "_").replace(".", "_"), mode='a')
    # Add comments
    with h5py.File(os.path.join(out_dir, 'yields2.h5'), 'a') as f:
        for key in dfs.keys():
            f[key.replace("=", "_").replace(".", "_")].attrs['Z'] = "%s" % key[2:]
            f[key.replace("=", "_").replace(".", "_")].attrs['MassUnit'] = 'Msun'

# Copy the NuPyCEE directory to the inputs directory
if os.path.exists('../inputs/NuPyCEE'):
    shutil.rmtree('../inputs/NuPyCEE')
shutil.copytree('./NuPyCEE', '../inputs/NuPyCEE')

'../inputs/NuPyCEE'

# In yields1.h
- The groups are named as `Z=X.XXXX`, where Z is the initial metallicity
- There 3 datasets in each group, named `Origianl`, `Interpolated`, and `MassLifetime`, which are the original yields, the interpolated yields, and the original mass lifetime, respectively.

# In yields2.h
- The groups are named as `Z_X_XXXX`, where Z is the initial metallicity. According to the convention from PyTable, the group name should be `Z_X_XXXX`.
- There 3 datasets in each group, named `Origianl`, `Interpolated`, and `MassLifetime`, which are the original yields, the interpolated yields, and the original mass lifetime, respectively.

yield1.h and yield2.h are the same, but the group names are different.