In [1]:
import sys
import h5py
import numpy as np
from utils.paths import SetupPaths
from utils.get_summary_data import compile_summary

paths = SetupPaths()
  

***Aim: get all of the separations for major pairs -- Illustris dwarf, massive, dark, hydro!***

In [8]:
data_summary = {}
for sim in ["Illustris","TNG"]:
    data_summary[sim] = {}
    
    for phys in ["dark","hydro"]:
        inst = compile_summary(sim=sim, phys=phys, size="dwarf", subset="major")
        data_summary[sim][phys] = inst.get_data()
        
        print(f"done with {sim} {phys}")
    

0 does not exist -- skipping
1 does not exist -- skipping
2 does not exist -- skipping
3 does not exist -- skipping
4 does not exist -- skipping
5 does not exist -- skipping
6 does not exist -- skipping
7 does not exist -- skipping
8 does not exist -- skipping
9 does not exist -- skipping
10 does not exist -- skipping
11 does not exist -- skipping
12 does not exist -- skipping
13 does not exist -- skipping
14 does not exist -- skipping
15 dark dwarf no pairs in subset major
16 dark dwarf no pairs in subset major
17 dark dwarf no pairs in subset major
18 dark dwarf no pairs in subset major
19 dark dwarf no pairs in subset major
20 dark dwarf no pairs in subset major
21 dark dwarf no pairs in subset major
22 dark dwarf no pairs in subset major
23 dark dwarf no pairs in subset major
24 dark dwarf no pairs in subset major
25 dark dwarf no pairs in subset major
26 dark dwarf no pairs in subset major
27 dark dwarf no pairs in subset major
28 dark dwarf no pairs in subset major
29 dark dwarf 

In [30]:
sim = "Illustris" # for now
subset = "major"


savepath = f"summarydata/{sim}_{subset}.hdf5"
f = h5py.File(f"{paths.path_pairs}{savepath}", 'w')
f.close()

# for sim in ["Illustris","TNG"]:
numsnaps = {"Illustris":135,"TNG":99}



units_dict = {
     "Redshift":"Redshift z",
     "Snapshot":"Snapshot number from simulation",
     "Number pairs":"Number of total pairs with a MW/LMC analog+companion",
     "Number primaries":"Number of total MW/LMCs",
     "Ratio pairs":"Ratio of the number of pairs to number of primaries",
     "Median Separation": "The median separation of all pairs [kpc]",
     "Median Separation Quartiles": "The 16 and 84th percentile of the separation of all pairs [kpc]",
     "Mean Separation": "The mean separation of all pairs [kpc]",
     "Mean Separation Std": "The standard deviation of the separation of all pairs [kpc]",
     "Median RelVel": "The median relative velocity of all pairs [km/s]",
     "Median RelVel Quartiles": "The 16 and 84th percentile of the relative velocity of all pairs [km/s]",
     "Mean RelVel": "The mean relative velocity of all pairs [km/s]",
     "Mean RelVel Std":  "The standard deviation of the relative velocity of all pairs [km/s]"}

# to create data dictionaries for each physics and size scale
summarydata = {}
for phys in ['dark','hydro']:
    summarydata[phys] = {}

    size = "dwarf"

    summarydata[phys][size]={"Redshift":[],
                             "Snapshot":[],
                             "Number pairs":[],
                             "Number primaries":[],
                             "Ratio pairs":[],
                             "Median Separation": [],
                             "Median Separation Quartiles": [],
                             "Mean Separation": [],
                             "Mean Separation Std": [],
                             "Median RelVel": [],
                             "Median RelVel Quartiles": [],
                             "Mean RelVel": [],
                             "Mean RelVel Std": []}
phys="hydro"
size="dwarf"


for snapshot in np.arange(0,numsnaps[sim]+1):
    pair_path = f"{sim}_{snapshot}_10.hdf5"
    pair_data = h5py.File(f"{paths.path_pairs}{pair_path}", "r")
    
    for phys in ['dark','hydro']:

        size = "dwarf"

        try:
            data = pair_data[phys][size]

            med_mask = np.array(data["Realization"]) == -1

            primary_stell = np.array(data["Sub1 Stellar Mass"])

            # primary stellar mass will be different for dwarf and massive pairs!~
            if size == "dwarf":
                primary_analog = (primary_stell > 0.01) & (primary_stell < 0.5)
            elif size == "massive":
                primary_analog = (primary_stell > 0.5) & (primary_stell < 2.5)


            # get major pairs only  
            majors = np.array(data["Stellar Mass Ratio"]) > 1/4

            major_mask = majors # & primary_analog
            majormed_mask = med_mask & majors # & primary_analog

            sublist = {} # dictionary  of subhalos in group

            if subset == "majormed":
                use_mask = majormed_mask
            if subset == "major":
                use_mask = major_mask


            if np.sum(use_mask) == 0:
                print(f"{snapshot} {phys} {size} no pairs in subset {subset}")
                continue

            for key, val in data.items():
                sublist[key] = np.array(val)[use_mask]


            # redshift and count info
            redshift = pair_data['Header'].attrs['Redshift']
            num_pairs = np.sum(use_mask)
            num_primaries = np.sum(primary_analog)
            ratio_pairs = num_pairs/num_primaries
            print(f"{snapshot} {np.sum(use_mask)}")

            # separation info
            med_sep = np.median(sublist['Separation'])
            qs_sep = np.percentile(sublist['Separation'],[16,84])
            mean_sep = np.mean(sublist['Separation'])
            std_sep = np.std(sublist['Separation'])

            # velocity info
            med_vel = np.median(sublist['RelVel'])
            qs_vel = np.percentile(sublist['RelVel'],[16,84])
            mean_vel = np.mean(sublist['RelVel'])
            std_vel = np.std(sublist['RelVel'])

            summarylist = np.array([redshift, snapshot, num_pairs, num_primaries, ratio_pairs, 
                                 med_sep, qs_sep, mean_sep, std_sep,
                                 med_vel, qs_vel, mean_vel, std_vel])

            for ind, key in enumerate(summarydata[phys][size].keys()):
                summarydata[phys][size][key].append(summarylist[ind])

        except KeyError:
            print(f"{snapshot} does not exist")
            continue

for phys in ['dark','hydro']:

size = "dwarf"

    f = h5py.File(f"{paths.path_pairs}{savepath}", 'r+')

    for key, val in summarydata[phys][size].items():
        val = np.array(val)
        dset = f.create_dataset(f'/{phys}/{size}/{key}', 
                                shape=val.shape,
                                dtype=val.dtype)
        dset.attrs[key] = units_dict[key]
        dset[:] = val
    f.close()
    print(f"successfully wrote {size} {phys} to {savepath}")

0 does not exist
0 does not exist
0 does not exist
0 does not exist
1 does not exist
1 does not exist
1 does not exist
1 does not exist
2 does not exist
2 does not exist
2 does not exist
2 does not exist
3 does not exist
3 does not exist
3 does not exist
3 does not exist
4 does not exist
4 does not exist
4 does not exist
4 does not exist
5 does not exist
5 does not exist
5 does not exist
5 does not exist
6 does not exist
6 does not exist
6 does not exist
6 does not exist
7 does not exist
7 does not exist
7 does not exist
7 does not exist
8 does not exist
8 does not exist
8 does not exist
8 does not exist
9 does not exist
9 does not exist
9 does not exist
9 does not exist
10 does not exist
10 does not exist
10 does not exist
10 does not exist
11 does not exist
11 does not exist
11 does not exist
11 does not exist
12 does not exist
12 does not exist
12 does not exist
12 does not exist
13 does not exist
13 does not exist
13 does not exist
13 does not exist
14 does not exist
14 does not ex

  majors = np.array(data["Stellar Mass Ratio"]) > 1/4


55 13751
55 288
55 hydro dwarf no pairs in subset major
55 hydro massive no pairs in subset major
56 15194
56 372
56 12603
56 338
57 17642
57 493
57 14691
57 404
58 19620
58 578
58 16189
58 448
59 21141
59 738
59 17538
59 651
60 22423
60 845
60 18545
60 779
61 23499
61 866
61 18913
61 880
62 24598
62 1105
62 20072
62 997
63 25483
63 1291
63 21667
63 1106
64 26494
64 1460
64 22376
64 1306
65 26858
65 1755
65 22834
65 1443
66 27672
66 1862
66 23326
66 1532
67 27672
67 1991
67 23301
67 1641
68 27431
68 2027
68 23004
68 1750
69 27475
69 2171
69 23424
69 1884
70 27492
70 2330
70 23508
70 1946
71 27700
71 2443
71 23756
71 2057
72 27383
72 2489
72 23378
72 2081
73 27388
73 2439
73 23123
73 2121
74 26766
74 2641
74 22452
74 2235
75 26445
75 2725
75 22074
75 2206
76 25756
76 2681
76 21945
76 2382
77 25398
77 2666
77 21755
77 2192
78 24543
78 2863
78 20851
78 2375
79 24192
79 2884
79 20552
79 2365
80 24072
80 2993
80 20241
80 2366
81 23739
81 2938
81 19775
81 2389
82 23116
82 2991
82 19745
82 25

In [10]:
np.array([True,True]) & np.array([True,False]) & np.array([False,False])

array([False, False])

In [30]:
majorlist

{'Group ID': array([ 1785,  1785,  2696, ..., 20192, 20192, 20215], dtype=int32),
 'Group Mass': array([37.14071621, 37.14071621, 27.07863938, ...,  8.14878602,
         8.14878602,  8.13750516]),
 'Group Nsubs': array([26, 26, 26, ...,  3,  3,  3]),
 'Group Radius': array([187.78538704, 187.78538704, 169.01328347, ..., 113.25562   ,
        113.25562   , 113.20454424]),
 'Realization': array([7, 8, 2, ..., 2, 8, 4]),
 'RelVel': array([205.9874027 , 205.9874027 , 141.97381774, ...,  40.14790167,
         40.14790167, 105.77087268]),
 'Separation': array([685.70840573, 685.70840573, 234.47516283, ...,  45.95757008,
         45.95757008,  16.2511526 ]),
 'Stellar Mass Ratio': array([0.69236606, 0.98321058, 0.92894464, ..., 0.63078583, 0.27565915,
        0.48151182]),
 'Sub1 ID': array([594229, 594229, 634749, ..., 834327, 834327, 834446]),
 'Sub1 Mass': array([32.18225187, 32.18225187, 12.28447394, ...,  6.35206699,
         6.35206699,  6.89506869]),
 'Sub1 Pos': array([[16230.75173118

In [17]:
# for sim in ["Illustris","TNG"]:
sim = "Illustris" # for now
numsnaps = {"Illustris":135,"TNG":99}

summarydata = {}

for snapshot in np.arange(0,numsnaps[sim]):
    pair_path = f"{sim}_{snapshot}_10.hdf5"
    pair_data = h5py.File(f"{paths.path_pairs}{pair_path}", "r")
    
    for phys in ["dark","hydro"]:
        try:
            summarydata[phys] = {}
            physdata = pair_data[phys]
            print(snapshot, phys)

            for size in ["dwarf","massive"]:
                summarydata[phys][size]={}
                
                sizedata = physdata[size]
                medmask = np.array(sizedata["Realization"]) == -1
                
                primary_stell = np.array(sizedata["Sub1 Stellar Mass"])

                # primary stellar mass will be different for dwarf and massive pairs!~
                if size == "dwarf":
                    primary_analog = (primstells > 0.01) & (primstells < 0.5)
                elif size == "massive":
                    primary_analog = (primstells > 0.5) & (primstells < 2.5)
                    
                    
                # get major pairs only  
                big = np.array(sizedata["Stellar Mass Ratio"]) > 1/4
                small = np.array(sizedata["Stellar Mass Ratio"]) > 1/10
                
                majors = primary_analog & big 
                major_mask = majors & primary_analog
                majormed_mask = med_mask & majors & primary_analog
                
                
                
            
#             summary_dict = {}

#             summary_dict['major'] = {"Redshift":[],
#                             "Median Separation": [],
#                             "Median Separation Quartiles": [],
#                             "Mean Separation": [],
#                             "Mean Separation Std": [],
#                             "Median RelVel": [],
#                             "Median RelVel Quartiles": [],
#                             "Mean RelVel": [],
#                             "Mean RelVel Std": [],
#                             "Number pairs":[],
#                             "Ratio pairs":[]}

        except KeyError:
            print(f"{snapshot} does not exist")
            continue

0 does not exist
0 does not exist
1 does not exist
1 does not exist
2 does not exist
2 does not exist
3 does not exist
3 does not exist
4 does not exist
4 does not exist
5 does not exist
5 does not exist
6 does not exist
6 does not exist
7 does not exist
7 does not exist
8 does not exist
8 does not exist
9 does not exist
9 does not exist
10 does not exist
10 does not exist
11 does not exist
11 does not exist
12 does not exist
12 does not exist
13 does not exist
13 does not exist
14 does not exist
14 does not exist
15 dark
15 hydro
16 dark
16 hydro
17 dark
17 hydro
18 dark
18 hydro
19 dark
19 hydro
20 dark
20 hydro
21 dark
21 hydro
22 dark
22 hydro
23 dark
23 hydro
24 dark
24 hydro
25 dark
25 hydro
26 dark
26 hydro
27 dark
27 hydro
28 dark
28 hydro
29 dark
29 hydro
30 dark
30 hydro
31 dark
31 hydro
32 dark
32 hydro
33 dark
33 hydro
34 dark
34 hydro
35 dark
35 hydro
36 dark
36 hydro
37 dark
37 hydro
38 dark
38 hydro
39 dark
39 hydro
40 dark
40 hydro
41 dark
41 hydro
42 dark
42 hydro
43 d

### graveyard of overeager coding

In [1]:
import sys
import h5py
import numpy as np
from utils.paths import SetupPaths

paths = SetupPaths()

savepath = f"summarydata.hdf5"
f = h5py.File(f"{paths.path_pairs}{savepath}", 'w')
f.close()

# for sim in ["Illustris","TNG"]:
sim = "Illustris" # for now
numsnaps = {"Illustris":135,"TNG":99}

# for phys in ["dark","hydro"]:
phys = "dark"

# for size in ["dwarf","massive"]:
size = "dwarf"

In [30]:
# begin collecting summary statistics
######

summary_dict = {}

for subset in ["median"]:
    summary_dict[subset] = {"Redshift":[],
                            "Median Separation": [],
                            "Median Separation Quartiles": [],
                            "Mean Separation": [],
                            "Mean Separation Std": [],
                            "Median RelVel": [],
                            "Median RelVel Quartiles": [],
                            "Mean RelVel": [],
                            "Mean RelVel Std": [],
                            "Number pairs":[],
                            "Ratio pairs":[]}

for snapshot in np.arange(40,numsnaps[sim]+1):
    
    try: 
        pair_path = f"{sim}_{snapshot}_10.hdf5"
        pair_data = h5py.File(f"{paths.path_pairs}{pair_path}", "r")
        
        redshift = pair_data["Header"].attrs['Redshift']
        print(snapshot)
    
        pairs_subset = pair_data[phys][size] # pairs e.g. in Illustris dark, dwarf or massive
        
        primstells = np.array(pairs_subset["Sub1 Stellar Mass"])
        
        med = np.array(pairs_subset["Realization"]) == -1
        
        # primary stellar mass will be different for dwarf and massive pairs!~
        if size == "dwarf":
            primary_analog = (primstells > 0.01) & (primstells < 0.5)
        elif size == "massive":
            primary_analog = (primstells > 0.5) & (primstells < 2.5)
            
        allprim = primary_analog
        
        # masks on mass ratio
        big = np.array(pairs_subset["Stellar Mass Ratio"]) > 1/4
        small = np.array(pairs_subset["Stellar Mass Ratio"]) > 1/10
        major = primary_analog & big 
        minor = primary_analog & ~big & small
        
        # masks on separation
        lowsep = np.array(pairs_subset["Separation"]) < 100
        highsep = np.array(pairs_subset["Separation"]) > 100
        close = primary_analog & lowsep
        far = primary_analog & highsep
        
        for subset in ["median","major","minor","lowsep","highsep","lowsepmajor","lowsepmajor","highsepmajor","highsepminor","primaries"]:
            print(subset)
#             if subset == "median":
#                 mask = med & primary_analog

#             if subset == "major":
#                 mask = major 

#             if subset == "minor":
#                 mask = minor

#             if subset == "lowsep":
#                 mask = close

#             if subset == "highsep":
#                 mask = far

#             if subset == "lowsepmajor":
#                 mask = close & major

#             if subset == "lowsepmajor":
#                 mask = close & minor

#             if subset == "highsepmajor":
#                 mask = far & major

#             if subset == "highsepminor":
#                 mask = far & minor

#             if subset == "primaries":
#                 mask = allprim

#             else:
#                 print("There is no such subset of pairs")
#                 break

            seps = np.array(pairs_subset['Separation'])[mask]
            vels = np.array(pairs_subset['RelVel'])[mask]
            num_prims = np.sum(primary_analog)
            num_pairs = np.sum(mask)

            summary_array = np.array([redshift,np.median(seps), np.percentile(seps,[16, 84]), np.mean(seps),np.std(seps),
                                      np.median(vels), np.percentile(vels,[16, 84]), np.mean(vels),np.std(vels),
                                     num_pairs, num_pairs/num_prims])

            for enum, key in enumerate(summary_dict[subset].keys()):
                summary_dict[subset][key].append(summary_array[enum])

    except KeyError:
        continue

#     except OSError:
#         print(f"Snapshot{snapshot} does not exist")
#         continue

40
median


IndexError: boolean index did not match indexed array along dimension 0; dimension is 2412 but corresponding boolean dimension is 144564

In [25]:
summary_dict['major']

KeyError: 'major'

In [14]:
test = h5py.File("../data/subhalos/Illustris_135.hdf5",'r')

In [16]:
test['dark']['dwarf'].keys()

<KeysViewHDF5 ['Group ID', 'Group Mass', 'Group Radius', 'Nsubs', 'Subhalo ID', 'Subhalo Mass', 'Subhalo Max Mass', 'Subhalo Max Mass Snap', 'Subhalo Med Stellar Mass', 'Subhalo Pos', 'Subhalo Stellar Masses', 'Subhalo Vel']>

In [26]:
phys = 'dark'
size = 'massive'

print(f"group mass {min(np.array(test[phys][size]['Group Mass']))} - {max(np.array(test[phys][size]['Group Mass']))}")

      
print(f"subhalo mass {min(np.array(test[phys][size]['Subhalo Mass']))} - {max(np.array(test[phys][size]['Subhalo Mass']))}")
      
print(f"subhalo med stellar mass {min(np.array(test[phys][size]['Subhalo Med Stellar Mass']))} - {max(np.array(test[phys][size]['Subhalo Med Stellar Mass']))}")

group mass 100.03265467557041 - 399.65980703180486
subhalo mass 0.10002663740041581 - 374.6185302734375
subhalo med stellar mass 1.917647520967911e-06 - 6.582588989457418


In [None]:
phys = 'dark'
size = 'massive'

print(f"group mass {min(np.array(test[phys][size]['Group Mass']))} - {max(np.array(test[phys][size]['Group Mass']))}")

      
print(f"subhalo mass {min(np.array(test[phys][size]['Subhalo Mass']))} - {max(np.array(test[phys][size]['Subhalo Mass']))}")
      
print(f"subhalo med stellar mass {min(np.array(test[phys][size]['Subhalo Med Stellar Mass']))} - {max(np.array(test[phys][size]['Subhalo Med Stellar Mass']))}")