### Part of this notebook is taken from the tutorial-notebooks: 03a, 07b and 08 

### Plottign functions taken from Melissa Graham's notebook and from Zooniverse testing
 https://github.com/rubin-dp0/cst-dev/blob/main/MLG_sandbox/DP02/citsci_sample_demo.ipynb

In [1]:
email = "jsv1206@gmail.com"  
slugName = "sreevani/test-project-sj" 
%run Citizen_Science_SDK.ipynb

Installing external dependencies...
Done installing external dependencies!
Enter your Zooniverse credentials...


Username:  sreevani
 ········


You now are logged in to the Zooniverse platform.
Loaded Citizen Science SDK


In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import warnings
from lsst.rsp import get_tap_service, retrieve_query
import gc
import os

# Astropy
import astropy.units as u
from astropy.units import UnitsWarning
from astropy.timeseries import LombScargle

import matplotlib.pyplot as plt
plt.style.use('tableau-colorblind10')

from lsst.daf.butler import Butler
import lsst.daf.butler as dafButler
import lsst.afw.display as afwDisplay
import lsst.geom as geom

In [2]:
%matplotlib inline

In [3]:
#initializing Tap and Butler
pd.set_option('display.max_rows', 20)
warnings.simplefilter("ignore", category=UnitsWarning)
service = get_tap_service()
assert service is not None
assert service.baseurl == "https://data.lsst.cloud/api/tap"

# Use lsst.afw.display with the matplotlib backend
afwDisplay.setDefaultBackend('matplotlib')

In [4]:
params = {'axes.labelsize': 24,
          'font.size': 20,
          'legend.fontsize': 14,
          'xtick.major.width': 3,
          'xtick.minor.width': 2,
          'xtick.major.size': 12,
          'xtick.minor.size': 6,
          'xtick.direction': 'in',
          'xtick.top': True,
          'lines.linewidth': 3,
          'axes.linewidth': 3,
          'axes.labelweight': 3,
          'axes.titleweight': 3,
          'ytick.major.width': 3,
          'ytick.minor.width': 2,
          'ytick.major.size': 12,
          'ytick.minor.size': 6,
          'ytick.direction': 'in',
          'ytick.right': True,
          'figure.figsize': [10, 8],
          'figure.facecolor': 'White'
          }

plt.rcParams.update(params)

In [5]:
plot_filter_labels = {'u':'u', 'g':'g', 'r':'r', 'i':'i', 'z':'z', 'y':'y'}
plot_filter_colors = {'u': '#56b4e9', 'g': '#008060', 'r': '#ff4000',
                      'i': '#850000', 'z': '#6600cc', 'y': '#000000'}
plot_filter_symbols = {'u': 'o', 'g': '^', 'r': 'v', 'i': 's', 'z': '*', 'y': 'p'}

In [6]:
plots = []

# band to get flux for
bands = ['g','r','i'] 

In [7]:
# image size for cutouts
image_size = 100

In [8]:
# number of stars to query 
query_num_stars = 2

# Set any RA and DEC in degrees. This is the centre of your search radius
# Let's look at one known RR-Lyrae. 

ra_known_rrl = 62.1479031
dec_known_rrl = -35.799138

# Search radius in degrees
search_radius = 0.001

In [26]:
# For each variable stars, number of images to query to create flipbook
num_variable_images = 5

In [9]:
# Define the butler and collection to query from
config = 'dp02'
collection = '2.2i/runs/DP0.2'
butler = dafButler.Butler(config, collections=collection)
skymap = butler.get('skyMap')

In [33]:
## Creating directories to save images, lightcurves, and table

batch_dir = './variable_stars_output' 

if os.path.isdir(batch_dir) == False:
    os.mkdir(batch_dir)
    
if os.path.isdir(batch_dir+'/lc_plots') == False:
    os.mkdir(batch_dir+'/lc_plots')
else:
    os.system('rm -r '+batch_dir+'/lc_plots/*')
    
if os.path.isdir(batch_dir+'/text_files') == False:
    os.mkdir(batch_dir+'/text_files')
else:
    os.system('rm -r '+batch_dir+'/text_files/*')
    
if os.path.isdir(batch_dir+'/images') == False:
    os.mkdir(batch_dir+'/images')
else:
    os.system('rm -r '+batch_dir+'/images/*')

rm: cannot remove ‘./variable_stars_output/text_files/*’: No such file or directory


## Plotting functions

In [32]:
def plotlc(days, magnitudes, out_name):
    fig = plt.figure(figsize=(10,4))
    for band in bands:
        plt.plot(days[band], magnitudes[band],\
                 plot_filter_symbols[band], ms=4, label=plot_filter_labels[band])
    plt.minorticks_on()
    plt.xlabel('MJD (days)')
    plt.ylabel('magnitude')
    plt.legend('upper right')
    plt.legend()
    plt.savefig(out_name)
    return fig
    
def make_calexp_fig(ra_deg,dec_deg,visit,detector,band,datasetType='calexp',cutoutSideLength=image_size):
    """
    get the tract and patch of a source. currently retrieves i band only. 

    Parameters
    ----------
    ra : ra of source in degrees
    dec : dec of source in degrees
    
    """
    cutoutSize = geom.ExtentI(cutoutSideLength, cutoutSideLength)
    
    radec = geom.SpherePoint(ra_deg,dec_deg, geom.degrees)
    
    dataId = {'visit': visit, 'detector': detector}  
    calexp_wcs = butler.get('calexp.wcs', **dataId)
    
    xy = geom.PointI(calexp_wcs.skyToPixel(radec))
    bbox = geom.BoxI(xy - cutoutSize // 2, cutoutSize)
    parameters = {'bbox': bbox}
    
    cutout_image = butler.get('calexp', parameters=parameters, **dataId)

    return cutout_image


def remove_figure(fig):
    """
    Remove a figure to reduce memory footprint.

    Parameters
    ----------
    fig: matplotlib.figure.Figure
        Figure to be removed.

    Returns
    -------
    None
    """
    # get the axes and clear their images
    for ax in fig.get_axes():
        for im in ax.get_images():
            im.remove()
    fig.clf()       # clear the figure
    plt.close(fig)  # close the figure

    gc.collect()    # call the garbage collector
    

## Getting Images from Butler

In [13]:
def get_cutout_image(ra_deg,dec_deg,visit,detector,band,datasetType='calexp',cutoutSideLength=image_size):
    """
    get the tract and patch of a source. currently retrieves i band only. 

    Parameters
    ----------
    ra : ra of source in degrees
    dec : dec of source in degrees
    
    """
    cutoutSize = geom.ExtentI(cutoutSideLength, cutoutSideLength)
    
    radec = geom.SpherePoint(ra_deg,dec_deg, geom.degrees)
    
    dataId = {'visit': visit, 'detector': detector}  
    calexp_wcs = butler.get('calexp.wcs', **dataId)
    
    xy = geom.PointI(calexp_wcs.skyToPixel(radec))
    bbox = geom.BoxI(xy - cutoutSize // 2, cutoutSize)
    parameters = {'bbox': bbox}
    
    cutout_image = butler.get('calexp', parameters=parameters, **dataId)

    return cutout_image

## Extract MJD and flux from the queried table

In [14]:
def get_flux(flux_table):
    # Create masks for the bands
    pick = {}
    for filter in plot_filter_labels:
        pick[filter] = (flux_table['band'] == filter)
    mjd_days = {}
    mags = {}
    for filter in plot_filter_labels:
        mjd_days[filter] = np.array(flux_table[pick[filter]]['expMidptMJD']) * u.day
        mags[filter] = np.array(flux_table[pick[filter]]['psfMag'])
        
    return mjd_days, mags

## Query to get variable stars

Selecting stars (truth_type=2)

variable (is_variable = 1)

is_pointsource = 1

In [18]:
# To query more than one star, change = 1 to <= 1

def query_stars(ra_deg, dec_deg, radius_deg, limit):
    #radius within which you want to query
    query = "SELECT mt.id_truth_type, mt.match_objectId, ts.ra, ts.dec "\
            "FROM dp02_dc2_catalogs.MatchesTruth AS mt "\
            "JOIN dp02_dc2_catalogs.TruthSummary AS ts ON mt.id_truth_type = ts.id_truth_type "\
            "WHERE ts.truth_type=2 "\
            "AND ts.is_variable = 1 "\
            "AND ts.is_pointsource = 1 "\
            "AND mt.match_objectId > 1 "\
            "AND CONTAINS(POINT('ICRS', ts.ra, ts.dec), CIRCLE('ICRS', "+ str(ra_deg)+", "+str(dec_deg)+", "+str(radius_deg)+")) = 1 "\
            "LIMIT "+str(limit)+" "
    results = service.search(query)
    variable_stars = results.to_table().to_pandas()
    return variable_stars

## Query to get the flux at all the bands, visitId, and detector information visitId and detector information 

VisitId and detetor is needed to get the calexp images from Butler

In [29]:
# def query_flux(objid):
#     query = "SELECT src.band, src.ccdVisitId, src.coord_ra, src.coord_dec, "\
#             "src.objectId, src.psfFlux, src.psfFluxErr, "\
#             "ccdvis.detector, ccdvis.visitId, "\
#             "scisql_nanojanskyToAbMag(psfFlux) as psfMag, "\
#             "visinfo.band, "\
#             "visinfo.expMidptMJD "\
#             "FROM dp02_dc2_catalogs.ForcedSource as src "\
#             "JOIN dp02_dc2_catalogs.CcdVisit as visinfo "\
#             "ON visinfo.ccdVisitId = src.ccdVisitId "\
#             "WHERE src.objectId = "+str(objid)+" "
#     lc = service.search(query)
#     flux_allbands = lc.to_table()
#     # Create masks for the bands
#     pick = {}
#     for filter in plot_filter_labels:
#         pick[filter] = (flux_allbands['band'] == filter)
#     mjd_days = {}
#     mags = {}
#     for filter in plot_filter_labels:
#         mjd_days[filter] = np.array(flux_allbands[pick[filter]]['expMidptMJD']) * u.day
#         mags[filter] = np.array(flux_allbands[pick[filter]]['psfMag'])
        
#     return mjd_days, mags

def query_flux(objid):
    query = "SELECT src.band, src.ccdVisitId, src.coord_ra, src.coord_dec, "\
            "src.objectId, src.psfFlux, src.psfFluxErr, "\
            "visinfo.detector, visinfo.visitId, "\
            "scisql_nanojanskyToAbMag(psfFlux) as psfMag, "\
            "visinfo.band, "\
            "visinfo.expMidptMJD "\
            "FROM dp02_dc2_catalogs.ForcedSource as src "\
            "JOIN dp02_dc2_catalogs.CcdVisit as visinfo "\
            "ON visinfo.ccdVisitId = src.ccdVisitId "\
            "WHERE src.objectId = "+str(objid)+" "
    table = service.search(query)
    flux_table = table.to_table()
    return flux_table

In [22]:
%%time
variable_stars = query_stars(ra_known_rrl, dec_known_rrl, search_radius, query_num_stars)

CPU times: user 8.88 ms, sys: 1.97 ms, total: 10.8 ms
Wall time: 2.05 s


In [23]:
variable_stars

Unnamed: 0,id_truth_type,match_objectId,ra,dec
0,835714_2,1651589610221899038,62.147903,-35.799138


## Get Calexp images and Lightcurves

In [34]:
stars_matchid = variable_stars['match_objectId'].to_numpy()
df_row = []

for i, objid in enumerate(stars_matchid):
    
    # Query the variable star flux, detector and visit information
    ccd_flux_table = query_flux(objid)
    
    # Get calexp images from Butler and plot them
    idx_images = np.round(np.linspace(0, len(ccd_flux_table) - 1, num_variable_images)).astype(int)  #randomly select 5 images for each variable star
    
    for i,idx in enumerate(idx_images):
        star_ra = ccd_flux_table['coord_ra'][idx]
        star_dec = ccd_flux_table['coord_dec'][idx]
        star_detector = ccd_flux_table['detector'][idx]
        star_visitid = ccd_flux_table['visitId'][idx]
        star_id = ccd_flux_table['objectId'][idx]
        star_ccdid = ccd_flux_table['ccdVisitId'][idx]
        print(star_ra, star_dec)
        print(star_detector, star_visitid)
        # For now, get only 'r' band images for each variable star
        calexp_image = get_cutout_image(star_ra, star_dec, star_visitid, star_detector, 'r', datasetType='calexp', cutoutSideLength=100)
        figout = make_fig(calexp_image, star_ra,star_dec,batch_dir+"/images/"+str(star_id)+"_"+str(star_ccdid)+".png")
        remove_figure(figout)
        
    mjd_days, mags = get_flux(ccd_flux_table)
    figout = plotlc(mjd_days, mags, batch_dir+"/lc_plots/"+"lc_"+str(objid)+".png")
    remove_figure(figout) 
    df_all_bands = []
    for band in bands:
        df = pd.DataFrame(data = {'band': [band]*len(mjd_days[band]), 'mjd_days': mjd_days[band], \
                          'mags': mags[band]}, index=None)
        df_all_bands.append(df)
    
    df_final = pd.concat(df_all_bands)
    outfile = batch_dir+"/text_files/"+"lc_"+str(objid)+".csv"
    df_final.to_csv(outfile, index=False, sep=',')

62.1479018 -35.7991382
46 414861
62.1479018 -35.7991382
138 669797
62.1479018 -35.7991382
2 970049
62.1479018 -35.7991382
85 12467
62.1479018 -35.7991382
123 686630


# Create csv for Zooniverse flipbook 

https://help.zooniverse.org/getting-started/example/#details-subject-sets-and-manifest-details-aka-what-is-a-manifesthttps://help.zooniverse.org/getting-started/example/#details-subject-sets-and-manifest-details-aka-what-is-a-manifest

In [35]:
# Still work in progress

In [36]:
# df_row = []
# for i, objid in enumerate(stars_matchid):
#     ccd_flux_table = query_flux(objid)
#     idx_images = np.round(np.linspace(0, len(ccd_flux_table) - 1, num_variable_images)).astype(int) 
#     df = pd.DataFrame(data = {'image1:' }
    
    

In [37]:
# df_row = []
# for index, row in variable_stars[0:2].iterrows():
#     df = pd.DataFrame(data = {'band_g': str(row['id_truth_type'])+"_g.png", 
#                               'band_r': str(row['id_truth_type'])+"_r.png",
#                               'band_i': str(row['id_truth_type'])+"_i.png"}, index=[0])
#     df_row.append(df)
    
# df_manifest = pd.concat(df_row)

# outfile = batch_dir+"/flipbook_manifest.csv"
# df_manifest.to_csv(outfile, index=False, sep=',')

# Send data to Zooniverse

In [22]:
cutouts = []
fields_to_add = ["objectId", "coord_ra", "coord_dec", "detect_isPrimary", "g_cModelFlux", "r_cModelFlux", "r_extendedness", "r_inputCount"]

for index, row in variable_stars[0:5].iterrows():
    for band in bands:
        figout_data = {
        "filename": str(row['id_truth_type'])+"_"+band+".png",
        "edc_ver_id": 'xxx',    
        "objectId": row['id_truth_type']
        }
        
        if "coord_ra" in fields_to_add:
            figout_data["coord_ra"] = row['ra']
        if "coord_dec" in fields_to_add:
            figout_data["coord_dec"] = row['dec']
        if "g_cModelFlux" in fields_to_add:
            figout_data["g_cModelFlux"] = True
        if "r_cModelFlux" in fields_to_add:
            figout_data["r_cModelFlux"] = 26.5
        if "r_extendedness" in fields_to_add:
            figout_data["r_extendedness"] = 1.0
        if "r_inputCount" in fields_to_add:
            figout_data["r_inputCount"] = 1.0
        cutouts.append(figout_data)
        
cutout_dir = batch_dir+"/coadd_images/"

In [23]:
list(cutouts[0].keys())

['filename',
 'edc_ver_id',
 'objectId',
 'coord_ra',
 'coord_dec',
 'g_cModelFlux',
 'r_cModelFlux',
 'r_extendedness',
 'r_inputCount']

In [24]:
subject_set_name = "variable stars cutouts" 

In [25]:
__cit_sci_data_type = _HIPS_CUTOUTS # Important: DO NOT change this value. Update - this value may be changed.
send_data(subject_set_name, cutout_dir, cutouts)

'1. Checking batch status'

'2. Writing metadata file required by the Rubin EPO Data Center.'

'3. Zipping up all the astro cutouts - this can take a few minutes with large data sets, but unlikely more than 10 minutes.'

'4. Uploading the citizen science data'

'5. Creating a new Zooniverse subject set'

'6. Notifying the Rubin EPO Data Center of the new data, which will finish processing of the data and notify Zooniverse'

'7. Cleaning up unused subject set on the Zooniverse platform, vendor_batch_id : 112310'