# LSLGA Groups

The purpose of this notebook is to build a group catalog from the parent sample of large galaxies.  Galaxies near one another (projected on the sky) must be analyzed at the same time, so we use a simple friends-of-friends algorithm to generate our group catalog.

## Preliminaries

In [1]:
import os
import numpy as np

In [2]:
import matplotlib.pyplot as plt
import astropy.units as u
from astropy.table import Table, Column
from astropy.coordinates import SkyCoord

In [3]:
from pydl.pydlutils.spheregroup import spheregroup

In [4]:
%matplotlib inline

## Read the parent HyperLeda catalog

In [None]:
LSLGAdir = os.getenv('LSLGA_DIR')

In [None]:
ledafile = os.path.join(LSLGAdir, 'sample', 'leda-logd25-0.05.fits')
leda = Table.read(ledafile)
leda

In [None]:
fig, ax = plt.subplots()
ax.scatter(leda['RA'], leda['DEC'], s=1, alpha=0.5)

In [None]:
#these = (leda['RA'] > 200) * (leda['RA'] < 220) * (leda['DEC'] > 0) * (leda['DEC'] < 10.0)
#leda = leda[these]
#print(np.sum(these))

## Run FoF with spheregroup

Identify groups using a simple angular linking length.  Then construct a catalog of group properties.

In [None]:
linking_length = 1.0 # [arcmin]

In [None]:
grp, mult, frst, nxt = spheregroup(leda['RA'], leda['DEC'], linking_length/60.0)

In [None]:
npergrp, _ = np.histogram(grp, bins=len(grp), range=(0, len(grp)))
nbiggrp = np.sum(npergrp > 1).astype('int')
nsmallgrp = np.sum(npergrp == 1).astype('int')
ngrp = nbiggrp + nsmallgrp

In [None]:
print('Found {} total groups, including:'.format(ngrp))
print('  {} groups with 1 member'.format(nsmallgrp))
print('  {} groups with 2-5 members'.format(np.sum( (npergrp > 1)*(npergrp <= 5) ).astype('int')))
print('  {} groups with 5-10 members'.format(np.sum( (npergrp > 5)*(npergrp <= 10) ).astype('int')))
print('  {} groups with >10 members'.format(np.sum( (npergrp > 10) ).astype('int')))

## Populate the output group catalog

In [None]:
groupcat = Table()
groupcat.add_column(Column(name='GROUPID', dtype='i4', length=ngrp)) # unique ID number
groupcat.add_column(Column(name='GALAXY', dtype='S1000', length=ngrp))
groupcat.add_column(Column(name='NMEMBERS', dtype='i4', length=ngrp))
groupcat.add_column(Column(name='RA', dtype='f8', length=ngrp))  # average RA
groupcat.add_column(Column(name='DEC', dtype='f8', length=ngrp)) # average Dec
groupcat.add_column(Column(name='RADIUS', dtype='f4', length=ngrp))

In [None]:
smallindx = np.arange(nsmallgrp)

In [None]:
ledaindx = np.where(npergrp == 1)[0]
groupcat['RA'][smallindx] = leda['RA'][ledaindx]
groupcat['DEC'][smallindx] = leda['DEC'][ledaindx]
groupcat['NMEMBERS'][smallindx] = 1
groupcat['GALAXY'][smallindx] = leda['GALAXY'][ledaindx]
groupcat['RADIUS'][smallindx] = leda['D25'][ledaindx] / 2.0 # [arcsec]

In [None]:
bigindx = np.arange(nbiggrp) + nsmallgrp

In [None]:
for grpindx, indx in zip(bigindx, np.where(npergrp > 1)[0]):
    ledaindx = np.where(grp == indx)[0]
    _ra, _dec = leda['RA'][ledaindx], leda['DEC'][ledaindx]
    
    groupcat['RA'][grpindx] = np.mean(_ra)
    groupcat['DEC'][grpindx] = np.mean(_dec)
    groupcat['NMEMBERS'][grpindx] = len(ledaindx)
    groupcat['GALAXY'][grpindx] = ','.join(leda['GALAXY'][ledaindx])

    cc = SkyCoord(ra=_ra*u.degree, dec=_dec*u.degree)
    sep = []
    for onecc in cc[1:]:
        sep.append(cc.separation(onecc).arcsec.max())
    sep = np.array(sep)
    groupcat['RADIUS'][grpindx] = np.max( (sep.max(), np.max(leda['D25'][ledaindx] / 2)) )

In [None]:
groupcat['GROUPID'] = np.arange(ngrp)

In [None]:
groupcat

In [None]:
ww = np.where(groupcat['NMEMBERS'] > 2)[0]
fig, ax = plt.subplots(1, 2, figsize=(12, 4))
ax[0].scatter(groupcat['RA'][ww], groupcat['DEC'][ww], s=1, alpha=0.5)
_ = ax[1].hist(groupcat['NMEMBERS'], bins=20, histtype='step', 
               cumulative=True, normed=True)

In [None]:
groupfile = os.path.join(LSLGAdir, 'sample', 'leda-logd25-0.05-groupcat.fits')
print('Writing {}'.format(groupfile))
groupcat.write(groupfile, overwrite=True)