# Step 2: Retrieve Particles around halos
For each Halo in sample defined in UC-1, extract particles in sphere of radius 2Mpc and store in Scratch DB and HDF5 file.

This code uses a special built table valued function in the database SimulationDB.
It can access the raw data files from within the database to get the particles efficiently.
Hence all particle access is from within SQL.

In [1]:
import SciServer.CasJobs as cj
import pandas
import os
import numpy
from datetime import datetime

In [2]:
# this code is here to adjust the root_folder scratch storage to the user
import SciServer.Authentication as auth
USER=auth.getKeystoneUserWithToken(auth.getToken())
root_folder=f'/home/idies/workspace/Temporary/{USER.userName}/scratch/cosmodata/'

In [3]:
# retrieve halos from MyScratch file defined in UC-1
store = pandas.HDFStore(f'{root_folder}HaloSample.h5','r')
halos = store.get('halos')
store.close()

In [4]:
halos

Unnamed: 0,haloId,np,x,y,z,halfMassRadius
0,313000080000000,200039,486.280200,157.82020,388.2235,0.513466
1,465001631000000,200044,168.364700,225.70780,274.2794,0.489514
2,259001305032328,200525,368.534000,207.11040,286.1121,0.575769
3,426000068000000,200210,96.649380,458.64020,298.5108,0.530035
4,428000049000000,200123,87.790540,383.21790,345.3171,0.545141
...,...,...,...,...,...,...
97,482000079000000,209068,130.260600,85.02935,473.2986,0.424814
98,485000114000000,207711,151.466800,59.10039,479.3499,0.589006
99,485008673000000,209051,176.334400,48.27508,453.0608,0.698470
100,505000079000000,209496,59.468820,88.61140,430.0368,0.514679


In [5]:
# open target store for particles, also an HDF5 file
particleStore = pandas.HDFStore(f'{root_folder}HaloParticles.h5')

# add halos data to this file
particleStore.put("Halos",halos)

# define radius of sphere within which we want to look for particles
RadiusMax=2

# set maximum number of halos for which to find particles
# set to small value when running demo
maxnum=20
count=0

# loop over halos, retrieve particlees for each one.
# NB could be done in one query !!
for ix,haloID, np, x, y, z, halfMassRadius in halos.itertuples():
    start=datetime.now()
    count+=1
    query = f"SELECT * FROM dbo.MillenniumParticles(63, dbo.Sphere::New({x},{y},{z},{RadiusMax}).ToString()) "
    print("Query",count,":",query)
    # submit query, use SimulationDB as context as this 
    df = cj.executeQuery(query, "SimulationDB")
    particleStore[f'Halo_{haloID}']= df
    end=datetime.now()
    print("# particles found =",len(df)," in time ",end-start)
    if(count >= maxnum) :
        break;

particleStore.close()

Query 1 : SELECT * FROM dbo.MillenniumParticles(63, dbo.Sphere::New(486.2802,157.8202,388.2235,2).ToString()) 
# particles found = 266145  in time  0:00:09.193790
Query 2 : SELECT * FROM dbo.MillenniumParticles(63, dbo.Sphere::New(168.3647,225.7078,274.2794,2).ToString()) 
# particles found = 264876  in time  0:00:08.655707
Query 3 : SELECT * FROM dbo.MillenniumParticles(63, dbo.Sphere::New(368.534,207.1104,286.1121,2).ToString()) 
# particles found = 358473  in time  0:00:11.247860
Query 4 : SELECT * FROM dbo.MillenniumParticles(63, dbo.Sphere::New(96.64938,458.6402,298.5108,2).ToString()) 
# particles found = 267676  in time  0:00:08.431994
Query 5 : SELECT * FROM dbo.MillenniumParticles(63, dbo.Sphere::New(87.79054,383.2179,345.3171,2).ToString()) 
# particles found = 260958  in time  0:00:08.192998
Query 6 : SELECT * FROM dbo.MillenniumParticles(63, dbo.Sphere::New(17.72766,52.44431,373.2024,2).ToString()) 
# particles found = 326834  in time  0:00:09.994263
Query 7 : SELECT * FROM

**GOTO folder and see data has appeared**

**GOTO CasJobs and see no data in MyScratch**

In [6]:
# write particle contents to MyScratch table
# 1st: create table
try:
    ddl="drop TABLE [myscratch:default].halo200k_particles"
    cj.executeQuery(ddl,context="[myscratch:default]")
except:
    pass

ddl="CREATE TABLE [myscratch:default].halo200k_particles(metaId bigint,id bigint, x real,y real,z real,haloID bigint)"
cj.executeQuery(ddl,context="[myscratch:default]")

Unnamed: 0,Rows Affected
0,0


In [7]:
particles = pandas.HDFStore(f"{root_folder}HaloParticles.h5")
keys=particles.keys()

In [8]:
# save particles to  for maxnum halos
maxnum=20
count = 0
for i,haloID, np, x, y, z, halfMassRadius in halos.itertuples():
    start=datetime.now()
    key = f'Halo_{haloID}'
    if('/'+ key not in keys):
        print(f'Cannot find data for key {key}')
        continue
    df = particles.get(key)
    ids = numpy.empty(len(df.index),dtype=numpy.dtype('i8'))
    ids.fill(haloID)
    df['haloID']=ids
    response=cj.uploadPandasDataFrameToTable(df,"halo200k_particles",context="[myscratch:default]")
    end=datetime.now()
    print("Duration :",end-start)
    count+=1
    if(count >= maxnum):
        break

Duration : 0:00:09.773558
Duration : 0:00:09.591688
Duration : 0:00:12.924550
Duration : 0:00:10.115145
Duration : 0:00:09.435166
Duration : 0:00:11.706373
Duration : 0:00:11.072421
Duration : 0:00:10.067161
Duration : 0:00:10.121218
Duration : 0:00:09.893673
Duration : 0:00:10.695924
Duration : 0:00:10.636464
Duration : 0:00:10.270264
Duration : 0:00:11.963823
Duration : 0:00:10.376019
Duration : 0:00:11.890572
Duration : 0:00:12.566383
Duration : 0:00:12.090870
Duration : 0:00:12.655622
Duration : 0:00:10.566009


In [9]:
particles.close()  

**GOTO CasJobs see that particles were written**