In [1]:
%matplotlib inline

In [9]:
import os
from astropy.io import fits
import pandas as pd
import sys
import sqlite3
import matplotlib.pyplot as plt
import numpy as np
from astropy.coordinates import SkyCoord
import tarfile
from astropy import units as u 
import lsst.afw.display as afwDisplay
import lsst.afw.geom as afwGeom


ModuleNotFoundError: No module named 'lsst'

In [7]:
import lsst.daf.persistence as dafPersist

ModuleNotFoundError: No module named 'lsst'

## This Weeks goal
1. Access HiTS light curve data and plot it for at least one variable object
2. Access LSST light curve data for the same object(s) and plot it too

In [None]:
hitsDataDir= '/epyc/users/mrawls/premap2019/hits-dr1'
hitsFilename= 'HiTS_DR1_variables_DM-dataset-subset.fits'
hitsFilepath = os.path.join(hitsDataDir, hitsFilename)
hitsTable= fits.open(hitsFilepath) #load data as an astropy fits thing
hitsDf= pd.DataFrame(hitsTable[1].data) #turn data into a pandas dataframe
hitsDf.head()#shows us (prints out) what the dataframe looks like
print(len(hitsDf))
#hitsDf.columns  #all of the things we can access from the data 82 columns


In [None]:

#Intermediate goal: get something like "Blind15A_40_s17_LC_5-.tar.gz from a row in the dataframe
testRow = hitsDf.iloc[0]
testRow
tok = testRow['internalID'].split('_')
print(tok)
field = '_'.join([tok[0], tok[1]])
print(field)
ccd = tok[2]
lightcurveTestFile= field + '_' + ccd + '_LC_50.tar.gz'
lightcurveTestPath= os.path.join(hitsDataDir, 'light_curves', field, ccd, lightcurveTestFile)
print(lightcurveTestPath)

In [None]:
def plot_hits(row, lcPath='/epyc/users/mrawls/premap2019/hits-dr1/light_curves'):
    '''Plots light curves from HiTS dr1.
    
    Parameters
    ----------
    row: Pandas dataframe row from DR1 source data 
    lcPath: Path on disk to light curves from DR1'''
    tok = row['internalID'].split('_')
    ccd = tok[2]
    field = '_'.join([tok[0], tok[1]])
    lightcurveFile= field + '_' + ccd + '_LC_50.tar.gz'
    #lightcurvePath= os.path.join(hitsDataDir, 'light_curves', field, ccd, lightcurveTestFile)
    tarball = tarfile.open(os.path.join(lcPath, field, ccd, lightcurveFile))
    data = tarball.extractfile(row['internalID'] + '_g.dat')
    dfl = pd.read_csv(data, sep='\t') # load a file with light curve data into the pandas datafram
    fig = plt.figure (figsize = (6, 4))
    plt.errorbar(dfl.MJD, dfl.MAG_AP1, dfl.MAGERR_AP1, marker='o', linestyle= ':')
    plt.xlabel('Time (MJD)')
    plt.ylabel('magnitude')
    

In [None]:
plot_hits(hitsDf.iloc[22])

We made it through part 1 of the goals! We have a function we can use to plot any of the 165 HiTS DR1  variable object light curves. We'll pick up from here next time to do part 2 of our goals

In [None]:
repo = '/epyc/users/mrawls/premap2019/hits-lsst/hits2015/rerun/highres1'
butler = dafPersist.Butler(repo)

In [None]:
dbName = 'association.db'
dbPath = os.path.join(repo, dbName)

We are connecting to the database using sqlite3. This wil run two queries to make two pandas dataframes. One is all the objects and one is all the sources. ***objects are composed of one or more sources that have been associated together based on position in the sky.***

These are big dataframes so they will take a little time to load

In [None]:
connection = sqlite3.connect(dbPath)

In [None]:
objTable = pd.read_sql_query('select diaObjectId, ra, decl, nDiaSources, \
                              gPSFluxMean, gPSFluxMeanErr, \
                              validityEnd, flags, \
                              gTOTFluxMean, gTOTFluxMeanErr \
                              from DiaObject where validityEnd is NULL;', connection)

Above we have imported/selected all the data from **OBJECTS** that we want from the columns to arange in a table

In [None]:
srcTableAll = pd.read_sql_query('select diaSourceId, diaObjectId, \
                                  ra, decl, ccdVisitId, \
                                  midPointTai, apFlux, psFlux, apFluxErr, \
                                  psFluxErr, totFlux, totFluxErr, flags \
                                  from DiaSource;', connection)

Above we have imported/selected all the data from **SOURCES** that we want from the columns ot arange in a table

In [None]:
objTable.head()
# you could also try objTable.columns


In [None]:
srcTableAll.head()

Using makeSrcTableFlags to get a version of srcTableAll that has "unpacked" information about the flags we want to use to filter out some obviously bad sources

In [None]:
sys.path.append('/epyc/users/mrawls/premap2019/ap_pipe-notebooks/')
from apdbPlots import makeSrcTableFlags

Now we will write code to disgard the information we deem bad (flags)

In [None]:
badFlagList = ['base_PixelFlags_flag_bad', 'base_PixelFlags_flag_suspect', 'base_PixelFlags_flag_saturatedCenter']

The unpacked information will return a lot! so we assign variable names to organize the info

In [None]:
flagTable, flagValues, srcTableFlags, flagFilter, noFlagFilter, \
    goodSrc, goodObj = makeSrcTableFlags(srcTableAll, objTable) #This is making a new table of data without the bad flags!

In [None]:
lsstRas = goodObj.ra
lsstDecs = goodObj.decl
hitsRas = hitsDf.raMedian_feat
hitsDecs = hitsDf.decMedian_feat #Altering the data so for regular function we only get good data

Now we have RA and Decs for both. We want to compare LSST catalogs to Objects in HiTS, but we need something to organize them so that they line up (use Astropy)

In [None]:
hitsCoords = SkyCoord(ra=hitsRas*u.degree, dec=hitsDecs*u.degree) #This allows us to convert the degrees between the different measurment methods
lsstCoords = SkyCoord(ra=lsstRas*u.degree, dec=lsstDecs*u.degree)
idx, d2d, d3d = hitsCoords.match_to_catalog_sky(lsstCoords)#This will compare directly one to another wihtin indices

As before, we have a powerful function that returns lots of stuff, but we only need the indices (saved in idx)

In [None]:
idx  # these are the indices of lsstCoords corresponding to hitsCoords 0, 1, 2, ...

In [None]:
# for example, this pulls up the row from goodObj that matches hitsDf.iloc[2]
goodObj.iloc[idx[2]]

In [None]:
goodObj.iloc[idx[127]]

In [None]:
def plotLsstLightcurve(obj, dbPath, fluxCol='totFlux'):
    '''Plots a light curve for a DIA (Difference Image Analysis) Object
    from an LSST APDB (Alert Production database).
    
    Parameters
    ----------
    obj : diaObjectId
        a really long integer that lets us retrieve sources for a single object
    objTable : Pandas dataframe containing DIA Objects
    repo : Butler repository
    dbPath : Path on disk to an APDB we can load DIA Objects or DIA Sources from
        often the database is named `association.db`
    fluxCol : Which flux column to plot?
        choices are totFlux, psFlux, apFlux
    
    '''
    plt.figure(figsize=(6,4))
    connection = sqlite3.connect(dbPath)
    # Load all sources for a single object called "obj"
    srcTable = pd.read_sql_query(f'select diaSourceId, diaObjectId, \
                                  ra, decl, ccdVisitId, \
                                  midPointTai, apFlux, psFlux, apFluxErr, \
                                  psFluxErr, totFlux, totFluxErr, flags \
                                  from DiaSource where diaObjectId = {obj};', connection)
    fluxErrCol = fluxCol + 'Err'
    plt.errorbar(srcTable['midPointTai'], srcTable[fluxCol], yerr=srcTable[fluxErrCol],
                 ls=':', marker='o')
    plt.ylabel(fluxCol + ' (nJy)')
    plt.xlabel('Time (MJD)')

In [None]:
plot_hits(hitsDf.iloc[2])

Below we will make a light curve with the bad information filtered out. This graph represents data from HiTS, run through LSST software. Our end goal is to compare our light curves to the light curves produced from DEC software and compare the differences!

In [7]:
obj = goodObj.iloc[idx[2]]['diaObjectId']  # can you explain what this line does?
plotLsstLightcurve(obj, dbPath) 

NameError: name 'goodObj' is not defined

In [None]:
plot_hits(hitsDf.iloc[127])

In [None]:
obj = goodObj.iloc[idx[127]]['diaObjectId']  
plotLsstLightcurve(obj, dbPath) 

In [None]:
# Hint: the astropy units module is your friend!
lsstTestMag = (140000*u.nJy).to(u.ABmag)
print(lsstTestMag.value)

In [None]:
srcTable = pd.read_sql_query(f'select diaSourceId, diaObjectId, \
                                  ra, decl, ccdVisitId, \
                                  midPointTai, apFlux, psFlux, apFluxErr, \
                                  psFluxErr, totFlux, totFluxErr, flags \
                                  from DiaSource where diaObjectId = {obj};', connection)
x= np.array(goodSrc['totFlux'])
lsstTestMag = (x[127]*u.nJy).to(u.ABmag)
print(lsstTestMag.value)


In [None]:
def plotLsstLightcurve(obj, srcTable, row, lcPath='/epyc/users/mrawls/premap2019/hits-dr1/light_curves'):
    '''Plots light curves from HiTS dr1.'''
    '''Plots a light curve for a DIA (Difference Image Analysis) Object
    from an LSST APDB (Alert Production database).
    
    Parameters
    ----------
    obj : diaObjectId
        a really long integer that lets us retrieve sources for a single object
    objTable : Pandas dataframe containing DIA Objects
    repo : Butler repository
    dbPath : Path on disk to an APDB we can load DIA Objects or DIA Sources from
        often the database is named `association.db`
    fluxCol : Which flux column to plot?
        choices are totFlux, psFlux, apFlux
    
    '''
    plt.figure(figsize=(10,8))
    srcRowFilter= (srcTable['diaObjectId'] == obj)
    srcRow= srcTable.loc[srcRowFilter]
    plt.errorbar(srcRow['midPointTai'], srcRow['magCol'], yerr=srcRow['magErrCol'],
                 ls=':', marker='o')
    
    tok = row['internalID'].split('_')
    ccd = tok[2]
    field = '_'.join([tok[0], tok[1]])
    lightcurveFile= field + '_' + ccd + '_LC_50.tar.gz'
    #lightcurvePath= os.path.join(hitsDataDir, 'light_curves', field, ccd, lightcurveTestFile)
    tarball = tarfile.open(os.path.join(lcPath, field, ccd, lightcurveFile))
    data = tarball.extractfile(row['internalID'] + '_g.dat')
    dfl = pd.read_csv(data, sep='\t') # load a file with light curve data into the pandas dataframe
    plt.errorbar(dfl.MJD, dfl.MAG_AP1, dfl.MAGERR_AP1, marker='o', linestyle= ':')
    plt.xlabel('Time (MJD)')
    plt.ylabel('magnitude')

In [None]:
srcTableArray=np.array(srcTableAll['totFlux'])
mag = (srcTableArray*u.nJy).to(u.ABmag)
srcTableAll['magCol'] = mag
srcTableAll.head()

In [None]:
magErr = np.abs(-1.0857/srcTableAll['totFluxErr']/srcTableAll['totFlux'])
srcTableAll['magErrCol'] = magErr
srcTableAll.head()

In [None]:

obj = goodObj.iloc[idx[11]]['diaObjectId'] 
row = hitsDf.iloc[11]
plotLsstLightcurve(obj, srcTableAll, row) 

In [None]:
hola