In [1]:
import os
from pathlib import Path
import json
import glob
import h5py
import numpy as np
from tqdm.notebook import tqdm

In [2]:
outdir = '../../data_prods/'
data_dir = '/home/ryan/Data'

subdir_list = [os.path.join(data_dir, o) for o in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir,o))]

print(subdir_list)

['/home/ryan/Data/run_2cDM_L3N256_DM_powerm2m2_sigma1_dir_2', '/home/ryan/Data/run_2cDM_L3N256_DM_power00_sigma1_dir_7_Vkick119.90', '/home/ryan/Data/run_2cDM_L3N256_DM_powerm2m2_sigma0.1', '/home/ryan/Data/run_2cDM_L3N256_DM_powerm2m2_sigma5.0_dir_9', '/home/ryan/Data/run_2cDM_L3N512_HY_power00_sigma1', '/home/ryan/Data/run_2cDM_L3N256_HY_power00_sigma0.1', '/home/ryan/Data/run_2cDM_L25N256_DM_power00_sigma1_dir_4_Vkick55.65', '/home/ryan/Data/run_CDM_L5N256_DM', '/home/ryan/Data/run_2cDM_L3N256_HY_power00_sigma1_dir_9', '/home/ryan/Data/run_SIDM_L3N256_DM_power0_sigma1', '/home/ryan/Data/run_2cDM_L3N256_HY_powerm2m2_sigma1_dir_5', '/home/ryan/Data/run_2cDM_L25N256_DM_power00_sigma1_dir_7_Vkick119.90', '/home/ryan/Data/run_2cDM_L3N256_DM_powerm2m2_sigma1_dir_1', '/home/ryan/Data/lost+found', '/home/ryan/Data/run_2cDM_L3N256_HY_power00_sigma1_dir_5', '/home/ryan/Data/run_2cDM_L3N256_DM_power00_sigma1_dir_3', '/home/ryan/Data/run_2cDM_L3N256_HY_powerm2m2_sigma10', '/home/ryan/Data/run_2

In [None]:
def make_CDF(data, nbins=1000):
    bins = np.geomspace(np.amin(data), np.amax(data), num=nbins)
    counts = np.array([ (data > b).sum() for b in bins ])
    
    return bins, counts

def get_profiles(f, nbins=1000):

    Subhalo=f.get('Subhalo')
    SubhaloMass=np.array(Subhalo['SubhaloMass'])
    SubhaloMassType=np.array(Subhalo['SubhaloMassType'])
    SubhaloVMax=np.array(Subhalo['SubhaloVmax']) #km/s
    MaxVel=np.absolute(SubhaloVMax)
    SubhaloMass=10**10*SubhaloMass #converting to solar masses
    SubhaloMassType=10**10*SubhaloMassType #converting to solar masses

    SubhaloN = Subhalo['SubhaloLen'][()]
    enough = SubhaloN > 20

    smass = SubhaloMass[enough]
    smasstype = SubhaloMassType[enough]
    svmax = MaxVel[enough]

    if len(smass) > 0:
        Mass_Bins, N_M = make_CDF(smass, nbins=nbins)
    else:
        Mass_Bins = N_M = np.zeros(1, dtype=np.uint8)
    if len(svmax > 0):
        MaxVel_Bins, N_V = make_CDF(svmax, nbins=nbins)
    else:
        MaxVel_Bins = N_V = np.zeros(1, dtype=np.uint8)
    
    mbins_type = []
    n_m_type = []

    for i in range(smasstype.shape[1]):
        masstype = smasstype[:,i]
        nonzero = masstype[ masstype != 0 ]

        if len(nonzero) > 0:
            mbins, N_M_type = make_CDF(nonzero, nbins=nbins)
        else:
            mbins = N_M_type = np.zeros(1, dtype=np.uint8)
        
        mbins_type.append(mbins)
        n_m_type.append(N_M_type)
    
    by_mass = smasstype[:,0] + smasstype[:,4]
    nonzero = by_mass[ by_mass != 0 ]
    if len(nonzero) > 0:
        mbins_by, N_M_by = make_CDF(nonzero, nbins=nbins)
    else:
        mbins_by = N_M_by = np.zeros(1, dtype=np.uint8)

    return (Mass_Bins, N_M), (MaxVel_Bins, N_V), (mbins_type, n_m_type), (mbins_by, N_M_by)

In [None]:
for subdir in tqdm(subdir_list):

    #run name is just folder name
    run_name = subdir.split('/')[-1]

    #reject folders that aren't runs
    if 'run' not in run_name:
        continue

    fpath = os.path.join(outdir, run_name)
    try:
        os.mkdir(fpath)
    except:
        print(f'{fpath} already exists!')
    print("Working on ", run_name)

    # read info from snapshots/fof
    snaps = sorted(glob.glob(subdir + '/snap*'))
    fofs = sorted(glob.glob(subdir + '/fof*'))

    if len(snaps) == 0:
        print(f'No snapshots for run {run_name}...')
        continue

    for fof in fofs:
        i = int(Path(fof).stem.split('_')[-1])
        with h5py.File(fof, 'r') as f:
            try:
                (mbins, mcount), (vbins, vcount), (mbins_type, mcount_type), (mbins_by, mcount_by) = get_profiles(f)
                tname = os.path.join(fpath, "mass_profile_{}.txt".format(i))
                np.savetxt(tname, (mbins, mcount))

                for j in range(len(mbins_type)):
                    tname = os.path.join(fpath, f"part_type_{j}_mass_profile_{i}.txt")
                    np.savetxt(tname, (mbins_type[j], mcount_type[j]))
                
                tname = os.path.join(fpath, "by_mass_profile_{}.txt".format(i))
                np.savetxt(tname, (mbins_by, mcount_by))

                tname = os.path.join(fpath, "vel_profile_{}.txt".format(i))
                np.savetxt(tname, (vbins, vcount))
            except KeyError:
                continue

  0%|          | 0/199 [00:00<?, ?it/s]

../../data_prods/run_2cDM_L3N256_DM_powerm2m2_sigma1_dir_2 already exists!
Working on  run_2cDM_L3N256_DM_powerm2m2_sigma1_dir_2
../../data_prods/run_2cDM_L3N256_DM_power00_sigma1_dir_7_Vkick119.90 already exists!
Working on  run_2cDM_L3N256_DM_power00_sigma1_dir_7_Vkick119.90
../../data_prods/run_2cDM_L3N256_DM_powerm2m2_sigma0.1 already exists!
Working on  run_2cDM_L3N256_DM_powerm2m2_sigma0.1
../../data_prods/run_2cDM_L3N256_DM_powerm2m2_sigma5.0_dir_9 already exists!
Working on  run_2cDM_L3N256_DM_powerm2m2_sigma5.0_dir_9
../../data_prods/run_2cDM_L3N512_HY_power00_sigma1 already exists!
Working on  run_2cDM_L3N512_HY_power00_sigma1
../../data_prods/run_2cDM_L3N256_HY_power00_sigma0.1 already exists!
Working on  run_2cDM_L3N256_HY_power00_sigma0.1
../../data_prods/run_2cDM_L25N256_DM_power00_sigma1_dir_4_Vkick55.65 already exists!
Working on  run_2cDM_L25N256_DM_power00_sigma1_dir_4_Vkick55.65
../../data_prods/run_CDM_L5N256_DM already exists!
Working on  run_CDM_L5N256_DM
../../da