In [1]:
import numpy as np
import pandas as pd
import glob
import sys
import h5py
#from netCDF4 import Dataset
from datetime import datetime
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from scipy.spatial import cKDTree

import pyarrow as pa
import pyarrow.parquet as pqw

from functools import reduce
import operator
import gc

In [2]:
# plot settings
plt.rc('font', family='serif') 
plt.rc('font', serif='Times New Roman') 
plt.rcParams.update({'font.size': 16})
plt.rcParams['mathtext.fontset'] = 'stix'

In [3]:
from pyspark import SparkContext   
from pyspark.sql import SparkSession

import pyspark.sql.functions as F
import pyspark.sql.types as T
from pyspark import Row
from pyspark.sql.window import Window as W

spark = SparkSession.builder \
    .appName("MyApp") \
    .master("spark://sohnic:7077") \
    .config("spark.driver.memory", "100g") \
    .getOrCreate()

sc = spark.sparkContext
sc.setCheckpointDir("hdfs://sohnic:54310/tmp/checkpoints")

spark.conf.set("spark.sql.debug.maxToStringFields", 500)
spark.conf.set("spark.sql.execution.arrow.pyspark.enabled", "true")

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/08/27 11:06:19 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


# 1. Reading the particle data

In [15]:
%%time

outname = 'hdfs://sohnic:54310/data/TNG300/snap99/parquet/snap099_sorted.parquet.snappy'
rawdf = spark.read.option("header","true").option("recursiveFileLookup","true").parquet(outname)

24/08/27 11:27:14 WARN SharedInMemoryCache: Evicting cached table partition metadata from memory due to size constraints (spark.sql.hive.filesourcePartitionFileCacheSize = 262144000 bytes). This may impact query planning performance.


CPU times: user 5.18 ms, sys: 1.63 ms, total: 6.81 ms
Wall time: 6.64 s


In [16]:
rawdf.printSchema()

root
 |-- px: double (nullable = true)
 |-- py: double (nullable = true)
 |-- pz: double (nullable = true)
 |-- vx: double (nullable = true)
 |-- vy: double (nullable = true)
 |-- vz: double (nullable = true)
 |-- mass: double (nullable = true)



In [17]:
%%time
rawdf.limit(4).toPandas().T



CPU times: user 16.3 ms, sys: 0 ns, total: 16.3 ms
Wall time: 2.56 s


                                                                                

Unnamed: 0,0,1,2,3
px,70037.914989,70050.889402,70052.049477,70044.449632
py,122711.870105,122702.683213,122687.959709,122716.085783
pz,78395.454087,78380.293789,78388.525112,78394.4034
vx,-115.07028,-295.58307,-357.15543,-302.51917
vy,-183.36308,-240.30115,-156.07643,-30.818964
vz,164.8555,57.16462,215.15742,586.6003
mass,0.000487,0.00068,0.000389,0.000632


In [18]:
%%time
rawdf.describe().show() 



+-------+------------------+-----------------+------------------+------------------+------------------+-------------------+--------------------+
|summary|                px|               py|                pz|                vx|                vy|                 vz|                mass|
+-------+------------------+-----------------+------------------+------------------+------------------+-------------------+--------------------+
|  count|         711967480|        711967480|         711967480|         711967480|         711967480|          711967480|           711967480|
|   mean|100688.61828005516|99059.36895091087|105857.77590178441|1.2704478306706155|1.0880226031171296|-0.3091079896978181|5.280344892323577E-4|
| stddev| 58078.24131182927|57695.00416267924|59449.995547022605|376.97520515072176|390.00232101241176| 379.19046823402147|1.424400845340058...|
|    min| 1.136348000727E-4| 8.96728655789E-4|   6.5989815512E-4|        -4127.3325|        -3954.2883|         -4355.7554|       

                                                                                

In [19]:
%%time
rawdf.describe().toPandas().T 



CPU times: user 196 ms, sys: 17.4 ms, total: 213 ms
Wall time: 44.5 s


                                                                                

Unnamed: 0,0,1,2,3,4
summary,count,mean,stddev,min,max
px,711967480,100688.61828005477,58078.24131182935,1.136348000727E-4,204999.9994621104
py,711967480,99059.3689509106,57695.00416267911,8.96728655789E-4,204999.9977547349
pz,711967480,105857.77590178442,59449.99554702274,6.5989815512E-4,204999.9998752371
vx,711967480,1.2704478306706162,376.9752051507215,-4127.3325,4329.7563
vy,711967480,1.088022603117128,390.0023210124113,-3954.2883,4393.859
vz,711967480,-0.30910798969781983,379.19046823402124,-4355.7554,5640.448
mass,711967480,5.280344892323588E-4,1.4244008453400572E-4,8.154293E-6,0.0014874495


# 2. Calculating stellar velocity dispersion

We measure the stellar velocity dispersion of ~10$^6$ subhalos. We first select stellar particles within spheres with a certain radius. We then calculate the mass-weighted stellar velocity dispersion based on the 3D velocity of selected stellar particles. 

### $M = \Sigma m_i $
### $\overline{v_x} = \frac{\Sigma m_i v_{i,x}}{M}$, $\, \overline{v_y} = \frac{\Sigma m_i v_{i,y}}{M}$, $\, \overline{v_z} = \frac{\Sigma m_i v_{i,z}}{M}$ 
### $\sigma_x^2 = \frac{\Sigma m_i (v_{i,x} - \overline{v_{x}})^2}{M}$, $\, \sigma_y^2 = \frac{\Sigma m_i (v_{i,y} - \overline{v_{y}})^2}{M}$, $\, \sigma_z^2 = \frac{\Sigma m_i (v_{i,z} - \overline{v_{z}})^2}{M}$
### $\therefore \sigma_{3D} = \sqrt{\sigma_x^2 + \sigma_y^2 + \sigma_z^2}$
We repeat the above calculation for six different radii (3, 5, 10, 20, 30, and 50 kpc).

In [22]:
#subhalo table (~few seconds)
t300 = pd.read_csv('./TNG300_quiescent_subhalos.csv')
print(len(t300))
t300.head()

104290


Unnamed: 0.1,Unnamed: 0,ID,GrNr,Posx,Posy,Posz,CMx,CMy,CMz,Velx,...,vdisp_dm_pos_sph_rh_z_bi,vdisp_dm_pos_sph_rh_z_mweight,vdisp_dm_pos_sph_rh_z_lweight,vdisp_dm_pos_sph_rh_3d_std,vdisp_dm_pos_sph_rh_3d_bi,vdisp_dm_pos_sph_rh_3d_mweight,vdisp_dm_pos_sph_rh_3d_lweight,central,quiescent,edge
0,1,394,0,41839.938,49050.41,146618.52,41841.72,49049.5,146618.42,505.39545,...,56.230135,0.0,0.0,93.595999,94.790011,0.0,0.0,0,1,1
1,2,403,0,45247.9,51453.363,145176.55,45247.434,51453.367,145176.25,484.588,...,61.195377,0.0,0.0,108.910434,110.497556,0.0,0.0,0,1,1
2,3,421,0,43345.83,50114.844,148136.55,43344.582,50116.066,148136.56,2266.7646,...,57.70442,0.0,0.0,100.625152,102.887322,0.0,0.0,0,1,1
3,4,423,0,43848.816,51581.773,145344.84,43851.332,51581.54,145348.3,-1288.3748,...,49.692601,0.0,0.0,90.353464,92.019996,0.0,0.0,0,1,1
4,5,447,0,44503.348,47543.66,146977.92,44503.926,47543.1,146978.66,-157.55678,...,57.70497,0.0,0.0,102.185058,104.080819,0.0,0.0,0,1,1
