In [21]:
import h5py as h5py
import numpy as np
import pandas as pd
import os

datapath   = '../data/V1_MR_fix'


In [22]:
def snapshot_redshifts(datapath):
    file_list_temp = os.listdir(datapath)
    snap_file_list = [file_.split('particledata_')[1] for file_ in file_list_temp if 'particledata' in file_]
    snap_id_list   = [int(file_.split('_')[0]) for file_ in snap_file_list]

    temp = sorted(zip(snap_id_list, snap_file_list))

    snap_id_list   = [t[0] for t in temp]
    snap_file_list = [t[1] for t in temp]

    redshift_list  = [file_.split('_')[-1] for file_ in snap_file_list]
    redshift_list  = [int(zzz[1:4].split('p')[0]) + int(zzz[5:])/1000 for zzz in redshift_list]
    redshift_list  = np.asarray(redshift_list)
    
    return [snap_file_list, snap_id_list, redshift_list]

In [23]:
snap_labels = snapshot_redshifts(datapath)
snap_label_list = snap_labels[0]
snap_num_list   = snap_labels[1]
snap_red_list   = snap_labels[2]

In [24]:
print(snap_label_list)

['127_z000p000']


# Create values for GROUP TABLE

In [25]:
def read_group_file(gpath, gfile):
    dict_out = {}
        
    fofs                = []      #group ID
    subs                = []      #subgroup ID
    cops                = []      #center of potential
    Vmax                = []      #max vel
    Mdm                 = []      #mass dark matter
    Ms                  = []      #mass stars
    Mgas                = []      #mass gas
    Rmax                = []      #max R?
    
    FOFCOP              = []
    R200                = []
    M200                = []
    FirstSubID          = []  
    Contamination_count = []      #number of subhalos that are lower res
                                #we won't need this as we will avoid contamination particles by selecting subhalos inside a 2Mpc sphere.



    with h5py.File(gpath+gfile+'.0.hdf5', 'r') as f:
        a = f['Header'].attrs['Time']                       #scale factor
        h = f['Header'].attrs['HubbleParam'] 
            
    n=0
    while os.path.exists(gpath+gfile+'.'+str(n)+'.hdf5'):
        with h5py.File(gpath+gfile+'.'+str(n)+'.hdf5', 'r') as f:
            try:
                fofs.extend(f['Subhalo/GroupNumber'][()])
                subs.extend(f['Subhalo/SubGroupNumber'][()])
                cops.extend(f['Subhalo/CentreOfPotential'][()]*a/h)   
                Vmax.extend(f['Subhalo/Vmax'][()])
                Rmax.extend(f['Subhalo/VmaxRadius'][()])   
                Mgas.extend(f['Subhalo/MassType'][:,0]*1e10/h)
                Mdm.extend(f['Subhalo/MassType'][:,1]*1e10/h)
                Ms.extend(f['Subhalo/MassType'][:,4]*1e10/h)

                FOFCOP.extend(f['FOF/GroupCentreOfPotential']*a/h)
                R200.extend(f['FOF/Group_R_Crit200'][()]*a/h)
                M200.extend(f['FOF/Group_M_Crit200'][()]*1e10/h)
                FirstSubID.extend(f['FOF/FirstSubhaloID'][()])
                Contamination_count.extend( f['FOF/ContaminationCount'][()])
            except:
                pass
            n+=1



    print('Sucessfully read {} files.'.format(n))

    # It  is very useful to work with "numpy arrays" (mathematical calculations are allowed). So I transform the python lists to  numpy arrays. (This I will do all the time as you will see below)
    # these arrays have length = to the number of subhalos
    fofs = np.array(fofs)   
    subs = np.array(subs)
    cops = np.array(cops)
    Vmax = np.array(Vmax)
    Mdm  = np.array(Mdm)
    Ms   = np.array(Ms)
    Mgas = np.array(Mgas)
    Rmax = np.array(Rmax)  

    subs_indxs = np.arange(0,len(subs))

    # these arrays have length = to the number of fofs
    FOFCOP     = np.array(FOFCOP)
    R200       = np.array(R200)
    M200       = np.array(M200)
    FirstSubID = np.array(FirstSubID)
    Contamination_count= np.array(Contamination_count)


    #fig,ax = plt.subplots()
    #ax.loglog(Mgas, Ms, 'ko')
    #ax.set_xlabel('Mgas')
    #ax.set_ylabel('Mstar')

    ### In principle, we only want to keep subhalos with virial masses M200>1e10 Msun (less is poorer resolution)
    fofIDs= np.where( (Contamination_count==0) )   # (no contamination particles)                      ### WARNING
    #Since FOF groups are ordered by mass, starting at fofID=1, this numpy.where array is giving me (FofID-1). 

    FOFCOP = FOFCOP[fofIDs]
    M200   = M200[fofIDs] 
    R200   = R200[fofIDs]

    

    # ## In addition,  We only will keep the "centrals" ,i.e.,  subs==0
    # ctr_1        = cops[(fofs<=20)*(subs==0)]
    # mdm_1        = Mdm [(fofs<=20)*(subs==0)]
    # #ctr_1_w      = np.asarray([mdm_1[i] * ctr_1[i,:] for i in range(0,20)])
    # #ctr_midpoint = np.mean(ctr_1_w, axis=0)/np.sum(mdm_1)

    # if len(ctr_1) == 0:
    #     pass

    # else:

    #     ctr_midpoint = np.average(ctr_1, axis=0, weights=mdm_1)

    #     #print(ctr_1_w)
    #     print(ctr_midpoint)

    #     D    = np.sqrt((ctr_midpoint[0]-cops[:,0])**2 + (ctr_midpoint[1]-cops[:,1])**2 + (ctr_midpoint[2]-cops[:,2])**2)

    cent = np.where((subs==0))

    fofs = fofs[cent]
    subs = subs[cent]
    cops = cops[cent]
    Vmax = Vmax[cent]
    Mdm  = Mdm[cent]
    Ms   = Ms[cent]
    Mgas = Mgas[cent]
    Rmax = Rmax[cent]

    subs_indxs = subs_indxs[cent]

    # the real fofID number is the numpy.where array above +1
    fofIDs_real = fofIDs[0] + 1
    which       = np.isin( fofs, fofIDs_real) 
    #It filters out the fofs we are interested in  (i.e., fofIDs_real) from the total fofs list (i.e., fofs).

    fofs = fofs[which]
    subs = subs[which]
    cops = cops[which]
    Vmax = Vmax[which]
    Mdm  = Mdm[which]
    Ms   = Ms[which]
    Mgas = Mgas[which]
    Rmax = Rmax[which]

    subs_indxs = subs_indxs[which]

    which = np.isin(fofIDs_real, fofs)
    M200  = M200[which]
    R200  = R200[which]

    dict_out['fofs']       = fofs
    dict_out['subs']       = subs
    dict_out['cops']       = cops
    # dict_out['Vmax']       = Vmax
    # dict_out['Mdm']        = Mdm
    # dict_out['Ms']         = Ms
    # dict_out['Mgas']       = Mgas
    dict_out['M200']       = M200
    dict_out['R200']       = R200
    # dict_out['subs_indxs'] = subs_indxs

    # dict_out['ctr_midp']   = ctr_midpoint

    return dict_out

In [26]:
output_arr = np.empty((0, 7))
for snap_label, snap_num in zip(snap_label_list, snap_num_list):

  datasuffix = snap_label
  gpath = datapath+'/groups_'+datasuffix+'/'
  gfile = 'eagle_subfind_tab_'+datasuffix

  dict_out = read_group_file(gpath, gfile)

  snap_arr = np.zeros(len(dict_out['fofs'])) + snap_num

  output_arr_temp = np.concatenate( (dict_out['fofs'].reshape(-1,1),
                                snap_arr.reshape(-1,1),
                                dict_out['M200'].reshape(-1,1),
                                  dict_out['R200'].reshape(-1,1),
                                    dict_out['cops'].reshape(-1,3)), axis=1)
  print(output_arr_temp.shape)
  output_arr = np.concatenate((output_arr, output_arr_temp), axis=0)

Sucessfully read 192 files.
(10599, 7)


In [7]:
print(output_arr[:,1])

[127. 127. 127. ... 127. 127. 127.]


In [8]:
df = pd.DataFrame(data=output_arr, columns=['fofs', 'snpnr', 'M200', 'R200', 'cops_x', 'cops_y', 'cops_z'])#, dtype=[int, float, float, float, float, float])
df['fofs'] = df['fofs'].astype('int')
df['snpnr'] = df['snpnr'].astype('int')
df.to_csv(path_or_buf='GROUP_TABLE_MR.csv', sep=',', index=False)

# Create values for SUBGROUP TABLE

In [9]:
def read_subgroup_file(gpath, gfile):
    dict_out = {}

    fofs                = []      #group ID
    subs                = []      #subgroup ID
    cops                = []      #center of potential
    Vmax                = []      #max vel
    Mdm                 = []      #mass dark matter
    Ms                  = []      #mass stars
    Mgas                = []      #mass gas
    Rmax                = []      #max R?

    FOFCOP              = []
    R200                = []
    M200                = []
    FirstSubID          = []  
    Contamination_count = []      #number of subhalos that are lower res
                                #we won't need this as we will avoid contamination particles by selecting subhalos inside a 2Mpc sphere.



    with h5py.File(gpath+gfile+'.0.hdf5', 'r') as f:
        a = f['Header'].attrs['Time']                       #scale factor
        h = f['Header'].attrs['HubbleParam'] 
            
    n=0
    while os.path.exists(gpath+gfile+'.'+str(n)+'.hdf5'):
        with h5py.File(gpath+gfile+'.'+str(n)+'.hdf5', 'r') as f:
            try:
                fofs.extend(f['Subhalo/GroupNumber'][()])
                subs.extend(f['Subhalo/SubGroupNumber'][()])
                cops.extend(f['Subhalo/CentreOfPotential'][()]*a/h)   
                Vmax.extend(f['Subhalo/Vmax'][()])
                Rmax.extend(f['Subhalo/VmaxRadius'][()])   
                Mgas.extend(f['Subhalo/MassType'][:,0]*1e10/h)
                Mdm.extend(f['Subhalo/MassType'][:,1]*1e10/h)
                Ms.extend(f['Subhalo/MassType'][:,4]*1e10/h)

                FOFCOP.extend(f['FOF/GroupCentreOfPotential']*a/h)
                R200.extend(f['FOF/Group_R_Crit200'][()]*a/h)
                M200.extend(f['FOF/Group_M_Crit200'][()]*1e10/h)
                FirstSubID.extend(f['FOF/FirstSubhaloID'][()])
                Contamination_count.extend( f['FOF/ContaminationCount'][()])
            except:
                pass
            n+=1



    print('Sucessfully read {} files.'.format(n))


    # It  is very useful to work with "numpy arrays" (mathematical calculations are allowed). So I transform the python lists to  numpy arrays. (This I will do all the time as you will see below)
    # these arrays have length = to the number of subhalos
    fofs = np.array(fofs)   
    subs = np.array(subs)
    cops = np.array(cops)
    Vmax = np.array(Vmax)
    Mdm  = np.array(Mdm)
    Ms   = np.array(Ms)
    Mgas = np.array(Mgas)
    Rmax = np.array(Rmax)  

    subs_indxs = np.arange(0,len(subs))

    # these arrays have length = to the number of fofs
    FOFCOP     = np.array(FOFCOP)
    R200       = np.array(R200)
    M200       = np.array(M200)
    FirstSubID = np.array(FirstSubID)
    Contamination_count= np.array(Contamination_count)


    #fig,ax = plt.subplots()
    #ax.loglog(Mgas, Ms, 'ko')
    #ax.set_xlabel('Mgas')
    #ax.set_ylabel('Mstar')

    ### In principle, we only want to keep subhalos with virial masses M200>1e10 Msun (less is poorer resolution)
    fofIDs= np.where( (Contamination_count==0) )   # (no contamination particles)                      ### WARNING
    #Since FOF groups are ordered by mass, starting at fofID=1, this numpy.where array is giving me (FofID-1). 

    FOFCOP = FOFCOP[fofIDs]
    M200   = M200[fofIDs] 
    R200   = R200[fofIDs]


    # the real fofID number is the numpy.where array above +1
    fofIDs_real = fofIDs[0] + 1
    which       = np.isin( fofs, fofIDs_real) 
    #It filters out the fofs we are interested in  (i.e., fofIDs_real) from the total fofs list (i.e., fofs).

    fofs = fofs[which]  
    subs = subs[which]
    cops = cops[which]
    Vmax = Vmax[which]
    Mdm  = Mdm[which]
    Ms   = Ms[which]
    Mgas = Mgas[which]
    Rmax = Rmax[which]

    subs_indxs = subs_indxs[which]

    which = np.isin(fofIDs_real, fofs)
    M200  = M200[which]
    R200  = R200[which]

    dict_out['fofs']       = fofs
    dict_out['subs']       = subs
    dict_out['cops']       = cops
    # dict_out['Vmax']       = Vmax
    dict_out['Mdm']        = Mdm
    dict_out['Mstar']      = Ms
    dict_out['Mgas']       = Mgas
    # dict_out['M200']       = M200
    # dict_out['R200']       = R200
    dict_out['subf_id'] = subs_indxs

    # dict_out['ctr_midp']   = ctr_midpoint

    return dict_out

In [10]:
output_arr = np.empty((0, 9))
for snap_label, snap_num in zip(snap_label_list, snap_num_list):

  datasuffix = snap_label
  gpath = datapath+'/groups_'+datasuffix+'/'
  gfile = 'eagle_subfind_tab_'+datasuffix

  dict_out = read_subgroup_file(gpath, gfile)

  snap_arr = np.zeros(len(dict_out['fofs'])) + snap_num

  output_arr_temp = np.concatenate( (dict_out['fofs'].reshape(-1,1),
                                      dict_out['subs'].reshape(-1,1),
                                      snap_arr.reshape(-1,1),
                                        dict_out['subf_id'].reshape(-1,1),
                                          dict_out['Mdm'].reshape(-1,1),
                                          dict_out['Mgas'].reshape(-1,1),
                                          dict_out['Mstar'].reshape(-1,1),
                                            dict_out['cops'].reshape(-1,3)), axis=1)
  print(output_arr_temp.shape)
  output_arr = np.concatenate((output_arr, output_arr_temp), axis=0)



Sucessfully read 192 files.
[ 9.104561 17.614407 84.18322 ]
(6467, 9)


In [11]:
print(output_arr[:,2])

[127. 127. 127. ... 127. 127. 127.]


In [50]:
df = pd.DataFrame(data=output_arr, columns=['fofs', 'subs', 'snpnr', 'subf_id', 'Mdm', 'Mgas', 'Mstar', 'cops_x', 'cops_y', 'cops_z'])#, dtype=[int, float, float, float, float, float])
df['fofs']  = df['fofs'].astype('int')
df['subs']  = df['subs'].astype('int')
df['snpnr'] = df['snpnr'].astype('int')
df['subf_id'] = df['subf_id'].astype('int')

df.to_csv(path_or_buf='SUBGROUP_TABLE_MR.csv', sep=',', index=False)

# Create values for TABLE PARTICLEDM

In [56]:
def load_particles(group_file, particle_file, ptype_flag): 
    '''
    loads in groupnumber, subgroupnumber, particleid for particle data file
    '''
    
    with h5py.File(group_file+'.0.hdf5', 'r') as f:
        a = f['Header'].attrs['Time']                       #scale factor
        h = f['Header'].attrs['HubbleParam'] 

    pid = np.empty(0, dtype=int)
    gnr = np.empty(0, dtype=int)
    snr = np.empty(0, dtype=int)
        
    #print(particle_file+'.'+str(0)+'.hdf5')

    n=0
    while os.path.exists(particle_file+'.'+str(n)+'.hdf5'):
        with h5py.File(particle_file+'.'+str(n)+'.hdf5', 'r') as f:
            
            
            try:
                pid = np.append(pid, f['PartType{}/ParticleIDs'.format(ptype_flag)][()], axis=0)
                gnr = np.append(gnr, f['PartType{}/GroupNumber'.format(ptype_flag)][()], axis=0)
                snr = np.append(snr, f['PartType{}/SubGroupNumber'.format(ptype_flag)][()], axis=0)
            except:
                pass

            n+=1

    
    print('Sucessfully read {} Particle Data files.'.format(n))

    return gnr, snr, pid

In [66]:
def load_snapshot(group_file, snapshot_file, ptype_flag): 
    '''
    loads in groupnumber, subgroupnumber, particleid for particle data file
    '''

    with h5py.File(group_file+'.0.hdf5', 'r') as f:
        a = f['Header'].attrs['Time']                       #scale factor
        h = f['Header'].attrs['HubbleParam'] 

    pid = np.empty(0, dtype=int)
    xyz = np.empty((0,3), dtype=float)
    hsml = np.empty(0, dtype=float)
    mass = np.empty(0, dtype=float)
    
    n=0
    while os.path.exists(snapshot_file+'.'+str(n)+'.hdf5'):
        with h5py.File(snapshot_file+'.'+str(n)+'.hdf5', 'r') as f:

            if ptype_flag == 4:  #'''Not all snapshots will have stars.'''
                
                try:
                    pid = np.append(pid, f['PartType{}/ParticleIDs'.format(ptype_flag)][()], axis=0)
                    xyz = np.append(xyz, f['PartType{}/Coordinates'.format(ptype_flag)][()]*a/h, axis=0) #Mpc

                    if ptype_flag == 0 or ptype_flag == 4:
                        mass = np.append(mass, f['PartType{}/Masses'.format(ptype_flag)][()]*1.e10/h, axis=0)

                    if ptype_flag == 0: # gas
                        hsml = np.append(hsml, f['PartType0/SmoothingLength'][()], axis=0)
                except: 
                    pass
            
            else:
                pid = np.append(pid, f['PartType{}/ParticleIDs'.format(ptype_flag)][()], axis=0)
                xyz = np.append(xyz, f['PartType{}/Coordinates'.format(ptype_flag)][()]*a/h, axis=0) #Mpc

                if ptype_flag == 0 or ptype_flag == 4:
                    mass = np.append(mass, f['PartType{}/Masses'.format(ptype_flag)][()]*1.e10/h, axis=0)

                if ptype_flag == 0: # gas
                    hsml = np.append(hsml, f['PartType0/SmoothingLength'][()], axis=0)

            n+=1
    print('Sucessfully read {} Particle Data files.'.format(n))

    print(len(pid))
    if ptype_flag == 0:
        return pid, xyz, mass, hsml
    elif ptype_flag == 4:
        return pid, xyz, mass
    else:
        return pid, xyz

In [72]:
datapath   = '../data/V1_LR_fix'
ptype_flag = 1

for snap_label, snap_num in zip(snap_label_list, snap_num_list):

    datasuffix = snap_label

    #group data
    gpath = datapath+'/groups_'+datasuffix+'/'
    gfile = 'eagle_subfind_tab_'+datasuffix

    #particle data
    ppath = datapath+'/particledata_'+datasuffix+'/'
    pfile = 'eagle_subfind_particles_'+datasuffix

    #snapshot data
    spath = datapath+'/snapshot_'+datasuffix+'/'
    sfile = 'snap_'+datasuffix


    ''' Load in PARTICLE DATA'''

    particles_out = load_particles(gpath+gfile, ppath+pfile, ptype_flag)

    gnr  = particles_out[0]
    snr  = particles_out[1]
    pid  = particles_out[2]

    
    ''' Load in SNAPSHOT DATA'''

    snapshot_out = load_snapshot(gpath+gfile, spath+sfile, ptype_flag)

    snap_pid = snapshot_out[0]
    snap_xyz = snapshot_out[1]

    if ptype_flag == 0:
        snap_mass = snapshot_out[2]
        snap_hsml = snapshot_out[3]
    elif ptype_flag == 4:
        snap_mass = snapshot_out[2]


    ''' Clean Up FOFID and SUBID which are INVALID'''

    snap_gnr = np.zeros(len(snap_pid), dtype=int) -999
    snap_snr = np.zeros(len(snap_pid), dtype=int) -999

    snap_gnr[np.isin(snap_pid, pid)] = gnr
    snap_snr[np.isin(snap_pid, pid)] = snr

    snap_gnr[snap_gnr< 0] = -999
    snap_snr[snap_snr< 0] = -999

    ''' Prepare OUTPUT ARRAY'''

    if ptype_flag == 0:
        output_arr = np.concatenate((snap_gnr.reshape(-1,1),
                                    snap_snr.reshape(-1,1),
                                    snap_pid.reshape(-1,1),
                                    snap_xyz.reshape(-1,3),
                                    snap_mass.reshape(-1,1),
                                    snap_hsml.reshape(-1,1)), axis=1)

    elif ptype_flag == 4:
        output_arr = np.concatenate((snap_gnr.reshape(-1,1),
                                    snap_snr.reshape(-1,1),
                                    snap_pid.reshape(-1,1),
                                    snap_xyz.reshape(-1,3),
                                    snap_mass.reshape(-1,1)), axis=1)

    else:
        output_arr = np.concatenate((snap_gnr.reshape(-1,1),
                                    snap_snr.reshape(-1,1),
                                    snap_pid.reshape(-1,1),
                                    snap_xyz.reshape(-1,3)), axis=1)
        
    ''' Setup PANDAS DF for CSV Export'''

    if ptype_flag == 0:
        df = pd.DataFrame(data=output_arr, columns=['fofs', 'subs', 'pids', 'coord_x', 'coord_y', 'coord_z', 'mass', 'hsml'])
    elif ptype_flag == 4:
        df = pd.DataFrame(data=output_arr, columns=['fofs', 'subs', 'pids', 'coord_x', 'coord_y', 'coord_z', 'mass'])
    else:
        df = pd.DataFrame(data=output_arr, columns=['fofs', 'subs', 'pids', 'coord_x', 'coord_y', 'coord_z'])

    df['fofs'] = df['fofs'].astype('int')
    df['subs'] = df['subs'].astype('int')
    df['pids'] = df['pids'].astype('int')


    df.sort_values(by=['fofs','subs'], inplace=True)



    if ptype_flag == 0:
        val = 'GAS'

    elif ptype_flag == 1:
        val = 'DM'

    elif ptype_flag == 4:
        val = 'STAR'
        
    else:
        val = 'WRONG'

    
    df.to_csv(path_or_buf='./SNAPSHOTS_LR_{}/SNAPSHOT_{}_V1_LR_{}.csv'.format(val, val, snap_num), sep=',', index=False)



Sucessfully read 16 Particle Data files.
Sucessfully read 16 Particle Data files.
1759284
Sucessfully read 16 Particle Data files.
Sucessfully read 16 Particle Data files.
1759284
Sucessfully read 16 Particle Data files.
Sucessfully read 16 Particle Data files.
1759284
Sucessfully read 16 Particle Data files.
Sucessfully read 16 Particle Data files.
1759284
Sucessfully read 16 Particle Data files.
Sucessfully read 16 Particle Data files.
1759284
Sucessfully read 16 Particle Data files.
Sucessfully read 16 Particle Data files.
1759284
Sucessfully read 16 Particle Data files.
Sucessfully read 16 Particle Data files.
1759284
Sucessfully read 16 Particle Data files.
Sucessfully read 16 Particle Data files.
1759284
Sucessfully read 16 Particle Data files.
Sucessfully read 16 Particle Data files.
1759284
Sucessfully read 16 Particle Data files.
Sucessfully read 16 Particle Data files.
1759284
Sucessfully read 16 Particle Data files.
Sucessfully read 16 Particle Data files.
1759284
Sucessfull

# Create values for MERGERTREE TABLE

In [2]:
def read_mergertree_file(mpath, mfile):
    
    desc_id      = []
    node_id      = []
    id_main_prog = []
    subf_id      = []
    snpnr_all    = []
    
    print(mpath + mfile + '/' + 'tree_127.'+'0' + '.hdf5')

    n =	0
    while os.path.exists( mpath + mfile + '/' + 'tree_127.'+str(n) + '.hdf5'):
        filein = mpath + mfile + '/' + 'tree_127.'+str(n) + '.hdf5'
        
        with h5py.File(filein, 'r') as f:
            desc_id.extend( f['haloTrees/descendantIndex'][()])
            node_id.extend( f['haloTrees/nodeIndex'][()])
            id_main_prog.extend( f['/haloTrees/mainProgenitorIndex'][()])
            subf_id.extend( f['/haloTrees/positionInCatalogue'][()])
            snpnr_all.extend( f['/haloTrees/snapshotNumber'][()]) 

            n+=1

    print('Sucessfully read {} files.'.format(n))


    desc_id = np.array(desc_id)
    node_id = np.array(node_id)
    subf_id = np.array(subf_id)
    snpnr_all = np.array(snpnr_all)
    id_main_prog = np.array(id_main_prog)


    dict_out = {}

    dict_out['desc_id']      = desc_id
    dict_out['node_id']      = node_id
    dict_out['subf_id']      = subf_id
    dict_out['snpnr_all']    = snpnr_all
    dict_out['id_main_prog'] = id_main_prog
    
    return dict_out

In [3]:
datapath   = '../data/V1_LR_fix'
mpath = datapath + '/'
mfile = 'merger_tree'

dict_out = read_mergertree_file(mpath, mfile)

../data/V1_LR_fix/merger_tree/tree_127.0.hdf5
Sucessfully read 1 files.


In [5]:
output_arr = np.concatenate( (dict_out['desc_id'].reshape(-1,1),
                                dict_out['node_id'].reshape(-1,1),
                                  dict_out['id_main_prog'].reshape(-1,1),
                                    dict_out['subf_id'].reshape(-1,1),
                                      dict_out['snpnr_all'].reshape(-1,1)), axis=1, dtype=int)

In [6]:
df = pd.DataFrame(data=output_arr, columns=['desc_id', 'node_id', 'id_main_prog', 'subf_id', 'snapnr'])#, dtype=[int, float, float, float, float, float])
df.to_csv(path_or_buf='MergerTree_V1_LR.csv', sep=',', index=False)