In [20]:
# std lib
from getpass import getpass

# standard
import numpy as np
import pylab as plt
import matplotlib
%matplotlib inline

from matplotlib.ticker import (MultipleLocator, AutoMinorLocator)

import pandas as pd

from numpy.polynomial import polynomial as P

# Data Lab
from dl import authClient as ac, queryClient as qc, storeClient as sc

# others
from dl.helpers.utils import convert
import os
import glob
import gc

In [21]:
# Login

token = ac.login(input("Enter user name: (+ENTER) "),getpass("Enter password: (+ENTER) "))

Enter user name: (+ENTER)  malinadesai
Enter password: (+ENTER)  ·········


In [22]:
# get the des tile names

destilenames = pd.read_csv('destilenames.csv')

In [23]:
# functions

def save_to_csv(frame, filename):
    ''' 
    Takes a dataframe [frame] and saves it as a csv with name [filename]
    frame     -   pd.dataframe object
    filename  -   string
    Adds the index to the dataframe
    '''
    frame['index'] = frame.index
    frame.to_csv(filename, index=False)
    
def save_to_csv_noindex(frame, filename):
    ''' 
    Takes a dataframe [frame] and saves it as a csv with name [filename]
    frame     -   pd.dataframe object
    filename  -   string
    Does not add the index to the dataframe
    '''
    frame.to_csv(filename, index=False) 
    
def get_tile_dbtable_vhs(table_num):
    '''
    table_num - integer representing tile number associated with desired table
    '''
    query = 'SELECT * from mydb://desvhs_tile{}'.format(str(table_num))
    df = convert(qc.query(sql=query))
    
    # Process the dataframe
    
    df = df.dropna(subset=['japermag3', 'japermag3err', 'ksapermag3', 'ksapermag3err'])
    df = df.reindex(df['dist_arcsec'].sort_values().index)
    duplicate = df[df.duplicated(subset=['sourceid'])]
    df = df.drop_duplicates(subset = ['sourceid'])
    df = df.reset_index(drop = True)
    
    return df, duplicate

    
def get_tile_dbtable_cat(table_num):
    '''
    table_num - integer representing tile number associated with desired table
    '''
    query = 'SELECT * from mydb://descat_tile{}'.format(str(table_num))
    df = convert(qc.query(sql=query))
    
    # Process the dataframe
    
    df = df.dropna(subset=['w1mpro', 'w1sigmpro', 'w2mpro', 'w2sigmpro'])
    df = df.reindex(df['dist_arcsec'].sort_values().index)
    duplicate = df[df.duplicated(subset=['source_id'])]
    df = df.drop_duplicates(subset = ['source_id'])
    df = df.reset_index(drop = True)
    
    return df, duplicate


In [None]:
# splitting DES into large tiles based on tilename

output = pd.DataFrame()               # initialize an empty dataframe
current_row = 0                       # initialize row counter
total_rows = len(destilenames)        # total number of tilenames           
remaining_rows = total_rows           # keeping track of how many rows left
tile_number = 1                       # used for numbering outputs as large tiles

while remaining_rows > 0:             # while there are rows left
    tick_counter = 0                  # sets counts for every 50 rows
    while (tick_counter < 50 and tick_counter >= 0 and remaining_rows > 0):  # doing 50 rows at a time
        # querying DES:
        query = """
        SELECT ra, dec, wavg_mag_psf_r, wavg_mag_psf_i, wavg_mag_psf_z, wavg_mag_psf_y, 
        wavg_magerr_psf_r, wavg_magerr_psf_i, wavg_magerr_psf_z, wavg_magerr_psf_y, tilename
        FROM des_dr2.main as dr2
        WHERE tilename = '{}'
            AND dr2.EXTENDED_CLASS_COADD <= 1
            AND dr2.IMAFLAGS_ISO_I = 0
            AND dr2.IMAFLAGS_ISO_Z = 0
            AND dr2.FLAGS_I < 4 
            AND dr2.FLAGS_Z < 4
        """.format(destilenames.iloc[current_row, 0])
        # formatting response as a csv and turning it into a dataframe
        response = qc.query(sql=query,format='csv')
        sources = convert(response,'pandas')
        # removing -99's from required DES z and DES y band columns
        sources = sources[sources.wavg_mag_psf_z != -99.0]
        sources = sources[sources.wavg_mag_psf_y != -99.0] 
        # renaming columns 
        sources = sources.rename(columns = {'ra' : 'ra_des', 'dec' : 'dec_des'})
        # resetting index
        sources = sources.reset_index(drop = True)
        # concatenating all 50 results
        output = pd.concat([sources, output], ignore_index = True)    
        # changing relevant counters
        current_row += 1
        remaining_rows -=1
        tick_counter += 1
    # saving output as a csv file and re-initializing the output dataframe
    save_to_csv(output, 'des_tile%d.csv' % tile_number)
    tile_number += 1
    output = pd.DataFrame()


In [None]:
# opening and processing des-vhs matches (first 50)

desvhs_list = []

for i in range(1, 205):
    df, duplicate = get_tile_dbtable_vhs(i)
    desvhs_list.append([df, duplicate])
    
print(len(desvhs_list))


In [7]:
desvhs_list[0][0]

Unnamed: 0,t1_ra_des,t1_dec_des,t1_wavg_mag_psf_r,t1_wavg_mag_psf_i,t1_wavg_mag_psf_z,t1_wavg_mag_psf_y,t1_wavg_magerr_psf_r,t1_wavg_magerr_psf_i,t1_wavg_magerr_psf_z,t1_wavg_magerr_psf_y,...,dec2000,japermag3,japermag3err,ksapermag3,ksapermag3err,pgalaxy,pstar,ra2000,sourceid,dist_arcsec
30087,75.567114,-62.397345,20.980152,20.783882,20.694887,20.675549,0.005994,0.007111,0.011759,0.052843,...,-62.397149,19.212173,0.086501,18.692427,0.303116,0.993865,0.003067,75.567672,472985790666,1.168386
39615,75.572685,-61.754233,21.146545,21.120657,21.117586,21.055979,0.007841,0.009795,0.019800,0.074144,...,-61.754298,19.688097,0.133111,18.365564,0.218808,0.993865,0.003067,75.573409,472985763465,1.255100
113161,66.745926,-28.831198,-99.000000,20.254150,19.856386,19.678724,-99.000000,0.011447,0.009391,0.038764,...,-28.830848,17.687151,0.024815,16.878920,0.061370,0.993865,0.003067,66.745781,473024285768,1.338694
13390,13.789402,-63.732621,21.350176,20.284021,19.842209,19.661495,0.022125,0.008034,0.006431,0.024759,...,-63.732270,17.673832,0.027273,17.006012,0.069162,0.993865,0.003067,13.789082,473587275347,1.363030
35746,75.407211,-62.052873,20.748817,20.824652,20.855060,20.664755,0.005109,0.007632,0.014184,0.067799,...,-62.053191,19.058645,0.072653,18.323967,0.215525,0.993865,0.003067,75.406709,472985775276,1.422952
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13983,13.723814,-63.560825,22.571340,22.128832,21.934113,21.850721,0.022650,0.023787,0.039163,0.250581,...,-63.556954,18.915981,0.083785,17.901615,0.157078,0.993865,0.003067,13.740262,473587272033,29.821053
49342,61.307913,-14.864673,22.468895,21.746560,21.472404,21.161160,0.022671,0.020941,0.029745,0.093460,...,-14.871110,15.456573,0.004795,14.962214,0.013119,0.003067,0.993865,61.313319,473049747151,29.848076
55985,39.644644,-0.930056,23.422527,22.355078,21.704315,21.516470,0.069616,0.038684,0.042930,0.257179,...,-0.923253,18.240720,0.049558,17.508778,0.115413,0.000171,0.999657,39.649416,473491105093,29.913229
14204,14.377365,-63.579771,21.224976,21.085358,21.029215,21.004450,0.006722,0.010043,0.015037,0.069662,...,-63.571493,18.418064,0.053370,17.518520,0.109391,0.486486,0.486486,14.378990,473587272291,29.914824


In [8]:
# opening and processing des-cat matches (first 50)

descat_list = []

for i in range(1, 205):
    df, duplicate = get_tile_dbtable_cat(i)
    descat_list.append([df, duplicate])
    
print(len(descat_list))

50


In [9]:
# merge the des-vhs matches to the des-cat matches

for i in range(0, 50):
    
    # perform the merge and save the result as a csv file
    df = pd.merge(desvhs_list[i][0], descat_list[i][0], on = 't1_index')
    save_to_csv_noindex(df, 'tile%d.csv' % i)

    # clearing out unnecessary variables and freeing memory
    vhs = desvhs_list[i][0]
    cat = descat_list[i][0]

    desvhs_list[i][0] = None
    descat_list[i][0] = None
    
    del(vhs)
    del(cat)
    gc.collect()

50
