<img align="left" src = logo-IJCLab-1.png width=250, style="padding: 10px"> 
<b>Query information on tracts and patchs from objects table </b> <br>
Last verified to run on 2022-10-12 with LSST Science Pipelines release w_2022_40 <br>
Contact authors: Sylvie Dagoret-Campagne (DP0 Delegate) <br>
Target audience: DP0 delegates member <br>

### Learning Objectives

The goal is to localize some big structures in tracts by using a Top - Down approach, starting from a tract then selecting manually a redshift slice with Holoview tool.
The density of sources is filtered by a KDE gaussian kernel to emphasize the LSS structure.


### Imports

In [None]:
# Import general python packages
import numpy as np
import re
import pandas as pd
import pickle
from pandas.testing import assert_frame_equal
import os
import errno
import shutil
import getpass

# Import the Rubin TAP service utilities
from lsst.rsp import get_tap_service, retrieve_query

# LSST Science Pipelines (Stack) packages
import lsst.daf.butler as dafButler
import lsst.afw.display as afwDisplay
import lsst.geom as geom
import lsst.afw.coord as afwCoord
afwDisplay.setDefaultBackend('matplotlib')

#
from lsst import skymap

# Astropy
from astropy import units as u
from astropy.coordinates import SkyCoord

# Bokeh for interactive visualization
import bokeh
from bokeh.io import output_file, output_notebook, show
from bokeh.layouts import gridplot
from bokeh.models import ColumnDataSource, CDSView, GroupFilter, HoverTool
from bokeh.plotting import figure
from bokeh.transform import factor_cmap

import holoviews as hv
from holoviews import streams, opts
from holoviews.operation.datashader import rasterize
from holoviews.operation.datashader import datashade, dynspread
from holoviews.plotting.util import process_cmap

import datashader as dsh


# Set the maximum number of rows to display from pandas
pd.set_option('display.max_rows', 20)


# Set the holoviews plotting library to be bokeh
# You will see the holoviews + bokeh icons displayed when the library is loaded successfully
hv.extension('bokeh', 'matplotlib')


# Display bokeh plots inline in the notebook
output_notebook()

In [None]:
# What versions of bokeh and holoviews nd datashader are we working with?
# This is important when referring to online documentation as
# APIs can change between versions.
print("Bokeh version: " + bokeh.__version__)
print("Holoviews version: " + hv.__version__)
print("Datashader version: " + dsh.__version__)

In [None]:
# allow for matplotlib to create inline plots in our notebook
%matplotlib inline
import matplotlib.pyplot as plt      # imports matplotlib.pyplot as plt
from matplotlib.colors import Normalize

import warnings                      # imports the warnings library
import gc                            # imports python's garbage collector

# Ignore warnings
from astropy.units import UnitsWarning
warnings.simplefilter("ignore", category=UnitsWarning)

In [None]:
# Set up some plotting defaults:

params = {'axes.labelsize': 28,
          'font.size': 24,
          'legend.fontsize': 14,
          'xtick.major.width': 3,
          'xtick.minor.width': 2,
          'xtick.major.size': 12,
          'xtick.minor.size': 6,
          'xtick.direction': 'in',
          'xtick.top': True,
          'lines.linewidth': 3,
          'axes.linewidth': 3,
          'axes.labelweight': 3,
          'axes.titleweight': 3,
          'ytick.major.width': 3,
          'ytick.minor.width': 2,
          'ytick.major.size': 12,
          'ytick.minor.size': 6,
          'ytick.direction': 'in',
          'ytick.right': True,
          'figure.figsize': [10, 6],
          'figure.facecolor': 'White'
          }

plt.rcParams.update(params)

In [None]:
from astropy.cosmology import FlatLambdaCDM

In [None]:
from IPython.display import Image

In [None]:
def remove_figure(fig):
    """Remove a figure to reduce memory footprint. """
    # get the axes and clear their images
    for ax in fig.get_axes():
        for im in ax.get_images():
            im.remove()
    fig.clf()      # clear the figure
    plt.close(fig) # close the figure
    gc.collect()   # call the garbage collector

In [None]:
# What version of the Stack are we using?
! echo $IMAGE_DESCRIPTION
! eups list -s | grep lsst_distrib

### 1. Notebook Configuration

#### 1.1 setup pathes

In [None]:
# username
myusername=getpass.getuser()

In [None]:
# temporary folders if necessary
NBDIR       = 'survpropmap'                       # relative path for this notebook output
TMPTOPDIR   = "/scratch"                          # always write some output in /scratch, never in user HOME 
TMPUSERDIR  = os.path.join(TMPTOPDIR,myusername)  # defines the path of user outputs in /scratch 
TMPNBDIR    = os.path.join(TMPUSERDIR,NBDIR)      # output path for this particular notebook

In [None]:
# create user temporary directory
if not os.path.isdir(TMPUSERDIR):
    try:
        os.mkdir(TMPUSERDIR)
    except:
        raise OSError(f"Can't create destination directory {TMPUSERDIR}!" ) 

In [None]:
# create this notebook temporary directory
if not os.path.isdir(TMPNBDIR):
    try:
        os.mkdir(TMPNBDIR)
    except:
        raise OSError(f"Can't create destination directory {TMPNBDIR}!" ) 

#### 1.2 Defines steering flags and parameters

The Output of the query may be saved in a file to speed up the nb if run more than one time.
By defaults all the following flags are set False : no query output is saved in file.
To speed-up the demo, the presenter may keep some of those flags True.


In [None]:
FLAG_WRITE_DATAFRAMEONDISK  = True   # Select if query output will be saved on disk
FLAG_READ_DATAFRAMEFROMDISK = True   # Select if the query can be read from disk if it exists
FLAG_CLEAN_DATAONDISK       = False  # Select if the output queries saved in file will be cleaned at the end of the notebook

#### 2.1 Create the Rubin TAP Service client

In [None]:
# Get an instance of the TAP service
service = get_tap_service()
assert service is not None
assert service.baseurl == "https://data.lsst.cloud/api/tap"

### 4. Work at patch level

#### build the query

In [None]:
MAGLIM = 21.0
RAMIN = 48.57 
RAMAX = 75.24
DECMIN = -44.63 
DECMAX = -26.78 

RIGHT_ASCENSION_CENTER = (RAMIN+RAMAX) /2.
DECLINATION_CENTER = (DECMIN+DECMAX) /2.
WIDTH = RAMAX - RAMIN 
HEIGHT = DECMAX - DECMIN

VERTEXES = str(RAMIN) + "," + str(DECMIN) + "," + str(RAMAX) + "," + str(DECMIN) + "," + str(RAMAX) +"," + str(DECMAX) + \
"," + str(RAMIN) +"," + str(DECMAX)
VERTEXES

In [None]:
my_adql_query = "SELECT coord_ra, coord_dec " + \
                "FROM dp02_dc2_catalogs.Object " + \
                "WHERE CONTAINS(POINT('ICRS', coord_ra, coord_dec), " + \
                "POLYGON('ICRS'," + VERTEXES + " )) = 1 " + \
                "AND r_extendedness = 1 " \
                "AND detect_isPrimary = 1 " \
                "AND scisql_nanojanskyToAbMag(r_cModelFlux) < " +str(MAGLIM) 

In [None]:
my_adql_query

In [None]:
filename_result=f'sources_result.pkl'
fullfilename_result=os.path.join(TMPNBDIR,filename_result)

In [None]:
filename_result

#### Call the job

In [None]:
if FLAG_READ_DATAFRAMEFROMDISK and os.path.exists(fullfilename_result):
    sql_result = pd.read_pickle(fullfilename_result)
    source_data  = sql_result
else:
    # Create and submit the job. This step does not run the query yet
    job = service.submit_job(my_adql_query,maxrec=5_000_000)
    # Get the job URL
    print('Job URL is', job.url)

    # Get the job phase. It will be pending as we have not yet started the job
    print('Job phase is', job.phase)
    
    # Run the job. You will see that the the cell completes executing,
    # even though the query is still running
    job.run()
    
    # Use this to tell python to wait for the job to finish if
    # you don't want to run anything else while waiting
    # The cell will continue executing until the job is finished
    job.wait(phases=['COMPLETED', 'ERROR'])
    print('Job phase is', job.phase)
    
    # A usefull funtion to raise an exception if there was a problem with the query
    job.raise_if_error()
    
    # Once the job completes successfully, you can fetch the results
    async_tract_data = job.fetch_result()
    
    source_data = async_tract_data.to_table().to_pandas()
    
    
if FLAG_WRITE_DATAFRAMEONDISK:
    source_data.to_pickle(fullfilename_result)

In [None]:
! ls -l $TMPNBDIR

In [None]:
len(source_data)

In [None]:
source_data.head()

##### a) 2D histogram view with matplotlib

In [None]:
x=source_data["coord_ra"]
y=source_data["coord_dec"]
xmin=x.min()
xmax=x.max()
ymin=y.min()
ymax=y.max()


In [None]:
H, xedges, yedges = np.histogram2d(x, y, bins=(1000, 1000))

### View with Matplotlib

In [None]:
#norm = Normalize(vmin=0, vmax=0.6)

In [None]:
fig, ax = plt.subplots(figsize=(7,6))
img=ax.imshow(H.T,origin="lower",extent=(xmin,xmax,ymin,ymax),cmap="jet")
plt.colorbar(img, ax=ax)
ax.set_aspect('auto')
ax.set_xlabel("RA (deg)")
ax.set_ylabel("DEC (deg)")
ax.set_title(f"sources (matplotlib image, histo2D)")

In [None]:
remove_figure(fig)

In [None]:
plt.figure(figsize=(4,3))
plt.hist(np.ravel(H),bins=50,range=(0,51),color='b')
plt.yscale('log')

## View with holoview

In [None]:
img_opts = dict(height=350, width=450, 
                xaxis="bottom", 
                padding = 0.01, fontsize={'title': '12pt'},
                colorbar=True, toolbar='right', show_grid=True,
                title= f"Objects for SPM",
                xlabel="RA",
                ylabel="DEC",
                tools=['hover']
               )    

In [None]:
flipHT=np.flipud(H.T)
img=hv.Image(flipHT,bounds=(x.min(),y.min(),x.max(),y.max())).opts(cmap="jet",title=f"Objects",xlabel="RA",ylabel="DEC").opts(**img_opts)

In [None]:
rasterize(img)

# Clean file if required

In [None]:
if FLAG_CLEAN_DATAONDISK:
    if os.path.isdir(TMPNBDIR):
        try:
            shutil.rmtree(TMPNBDIR)
        except OSError as e:
            print("Error: %s : %s" % (TMPNBDIR, e.strerror)) 