## Getting particles in subhalos
We can quickly fnd particles in spheres or boxes using spatial index.
To get exactly the particles in a sub-halo we need to read files in sub_ids files to get IDs, then use the resulting list to subselect the data obtained from a small sphere around the locaiton of the halo.


In [None]:
import SciServer.CasJobs as cj
import numpy as np
import matplotlib.pyplot as plt
import re
import simfiles

In [None]:
snapfiles,subtabfiles,subidsfiles=simfiles.allfiles("Millennium")

In [None]:
subtabfiles[:10]

In [None]:
def addlocation(df):
    ids=[]
    tabs=[]
    for i,row in df.iterrows():
        sn=int(row['snapnum'])
        fr=int(row['file'])
        pattern="sub_ids_{0}.{1}".format("%03d"%sn,fr)
        ids.append(list(filter(lambda x : x.endswith(pattern), subidsfiles))[0])
        pattern="sub_tab_{0}.{1}".format("%03d"%sn,fr)
        tabs.append(list(filter(lambda x : x.endswith(pattern), subtabfiles))[0])
    df['subids']=ids
    df['subtabs']=tabs

In [None]:
sql="""
select top 10 snapnum,x,y,z,phkey,np
,      halfmassradius,subhalofileid, haloid
  from MR h
 where h.snapnum=62 
    and h.np between 5000 and 10000
    and h.haloid=h.firsthaloinfofgroupid
 order by newid()
"""
df=cj.executeQuery(sql,"MPAHaloTrees")
df['file']=(np.floor((df['subhalofileid']-df['snapnum']*1000000000000)/100000000)).astype(np.int64)
df['rank']=df['subhalofileid'] % 100000000
addlocation(df)
df.head(3)


In [None]:
def getoffsets(row):
    tf=row['subtabs']
    si=row['subids']
    with open(tf,'rb') as f:
        Ngroups,Nids,TotNgroups,NFiles,Nsubhalos  = np.fromfile(f,np.int32,5)
        f.seek(20+2*Ngroups*4)
        sublen=np.fromfile(f,np.int32,Nsubhalos)
        f.seek(20+2*Ngroups*4+4*Nsubhalos)
        offsets=np.fromfile(f,np.int32,Nsubhalos)
    return list(zip(sublen,offsets))

In [None]:
def subhaloData(row):
    rank=int(row['rank'])
    nump=int(row['np'])
    tf=row['subtabs']
    si=row['subids']
    TotNsubs = 0
    with open(tf,'rb') as f:
        Ngroups,Nids,TotNgroups,NFiles,Nsubhalos  = np.fromfile(f,np.int32,5)
        print(Ngroups,Nids,TotNgroups,NFiles,Nsubhalos)
        f.seek(20+2*Ngroups*4+rank*4)
        numpf=np.fromfile(f,np.int32,1)[0]
        f.seek(20+2*Ngroups*4+4*Nsubhalos+rank*4)
        offset=np.fromfile(f,np.int32,1)[0]
        print(nump,'vs',numpf,'off=',offset,'rank=',rank)
    with open(si,'rb') as f:
        Ngroups, Nids, TotNgroups, NTask = np.fromfile(f,np.int32,4)
        print(Ngroups, Nids, TotNgroups, NTask)
        f.seek(offset*8+16)
        ids=np.fromfile(f,np.int64,nump)
    particleid = np.bitwise_and(ids[:], (np.int64(1)<<34) - 1) # [(i << 30) >> 30 for i in ids]
#    hashkey    = [i >> 34 for i in ids]
    return particleid #,hashkey 

In [None]:
row=df.iloc[0]
pids=subhaloData(row)

In [None]:
sql="""
select p.*
  from dbo.MillenniumParticlesPosVel({sn},'SPHERE[{x},{y},{z},{r}]') p
""".format(sn=int(row['snapnum']),x=row['x'],y=row['y'],z=row['z'],r=5*row['halfmassradius'])
print(sql)
ps=cj.executeQuery(sql,'SimulationDB')
ps.set_index('id',inplace=True)
print('found',len(ps),'particles')

In [None]:
shp=ps.loc[pids]

In [None]:
X='x'
Y='y'
f,ax=plt.subplots(figsize=(8,8))
#ax.scatter(ps[X],ps[Y],s=.1)
ax.scatter(shp[X],shp[Y],s=.1,color='red')
ax.set_aspect('equal')