## 14 Aug 2018
-- Laurin Gray

Based on color cuts, create catalogs of whether a star is C-rich or O-rich, separated by confidence level, for the lower two tiers of catalogs (in 5 and in 4)

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from scipy.stats import gaussian_kde
import csv
import pathlib

In [2]:
# Read in my data from a .csv file saved locally.

# Source catalogs
master = pd.read_csv('/Users/lgray/Documents/Phot_data/Red_Cand_Catalogs/8Aug2018/RedCandTiers_8Aug2018_lauringray.csv')
in5 = pd.read_csv('/Users/lgray/Documents/Phot_data/Red_Cand_Catalogs/8Aug2018/8Aug2018_LG_RedCand_5.csv')
in4 = pd.read_csv('/Users/lgray/Documents/Phot_data/Red_Cand_Catalogs/8Aug2018/8Aug2018_LG_RedCand_4.csv')


In [3]:
# Some values in the tables are +/- 9999, due to null data from combining the tables, 
# which throws off the color scaling.  We wish to exclude these values, 
# which we do by cutting to only include values in the range -500:500

filter_master = master[(master < 500.0) & (master > -500.0)]
filter_in5 = in5[(in5 < 500.0) & (in5 > -500.0)]
filter_in4 = in4[(in4 < 500.0) & (in4 > -500.0)]

In [4]:
# For ease in calling functions, we rename the value arrays to exclude filter_phot_data._____.values

in5_ID = in5.ID.values
in4_ID = in4.ID.values

in5_Kmag = filter_in5.Kmag.values
in4_Kmag = filter_in4.Kmag.values

in5_jMINUSk = filter_in5.jMINUSk.values
in4_jMINUSk = filter_in4.jMINUSk.values

In [5]:
def create_sections(ID, xval, yval):
    """
    Create bins to hold IDs, depending on the section of the plot they fall into. 
    Fill those bins with the values, and return the bins.
    
    The user enters the ID set, xaxis (jMINUSk), & yaxis (Kmag).
    
    Call example:
        in8_ID = filter_in8.ID.values
        in8_Kmag = filter_in8.Kmag.values
        in8_jMINUSk = in8.jMINUSk.values
        
        c_bin, o_bin = create_sections(in8_ID, in8_jMINUSk, in8_Kmag)
    """
    
    c_bins = [] # C-rich IDs
    o_bins = [] # O-rich IDs

    #print(y_bins)

    c=0 #row counter
    for i in ID:
        if yval[c] < 17.1: # if the point is above the TRGB
            if xval[c] > 1.36:
                c_bins.append(i)
                c = c+1
            elif xval[c] > 1.09: # already less than 1.36
                o_bins.append(i)
                c = c+1
            else:
                c = c+1
        else: 
            c = c+1
        
    return c_bins, o_bins

In [6]:
def corr_rows(groups, lengths):
    """
    Some of the ID numbers are wrong (ex. there are two 2118s), which means we can't use the ID to 
    directly access the row it belongs to. As we go further down, the problem gets worse.
    This function finds the correct rows in phot_data for each ID and saves them to a list.

    Takes a list of the CMD counts you want to include in the plot (group) (and their corresponding lengths) 
    and outputs a list of the rows in phot_data which correspond to the IDs in those groups.
    
    Note that because groups must be a list, even if you are just running one column, 
    you need to define the column and length in a list first (ex. groups = [in_eight]; corr_rows(groups, lengths)).
    
    For the tiered catalogue, list the columns in order of most to least confidence.
    
    Call example:
        group = [in_eight, in_seven, in_six]
        length = [359, 139, 230, 355]

        source_rows = corr_rows(group, length)
    """
    
    rows = master.ID.values
    
    cat_rows = []
    
    d = 0
    for j in groups:
        group_lim = lengths[d]
        k = 0
        #print(group_lim)
        for i in j:
            c = 0 # counter for phot_data rows, resets for each new element i
            # use a while loop so that it iterates until the end of the column
            while c < 1521 and k < group_lim: # to prevent reaching the end of the column and getting a nan error
                if int(i) != rows[c]: # check if i is equivalent to the ID in the current phot_data row
                    c = c+1 # if not, move to next row & go back to the top of the while loop
                else: # if i IS equivalent
                    cat_rows.append(c) # add the current row to corr_rows
                    c = 1521 # set c to stop iterating through the rest of the rows (end loop)
            k = k+1 # symbolically move onto the next element in in_ten (to stop the while loop at the end of in_ten)
        
        d = d+1
    
    return cat_rows

In [7]:
def mag_lookup(source_rows):
    """
    Takes the row of a source in the CMD count, then uses it to look up the related RA, Dec, & magnitudes
    in the master tiered catalog.  source_rows should come from the output of the corr_rows function.
    
    It is very similar to xy_lookup, but accesses all of the data associated with that ID instead of 
    just the desired x and y axes.
    
    This function is called "coord_lookup" in the 24July2018_RedCandCat notebook.
    
    Call example:
        ID, RA, Dec, k36mag, k45mag, k58mag, k80mag, k24mag, Jmag, Hmag, Kmag, 
            jMINUSh, hMINUSk, jMINUSk = mag_lookup(source_rows)
    """
    
    ID = []
    RA = []
    Dec = []
    k36mag = []
    k45mag = []
    k58mag = []
    k80mag = []
    k24mag = []
    Jmag = []
    Hmag = []
    Kmag = []
    jMINUSh = []
    hMINUSk = []
    jMINUSk = []
    
    k = 0 # row counter
    for i in source_rows:
        ID.append(master.ID.values[i])
        RA.append(master.RA.values[i])
        Dec.append(master.Dec.values[i])
        k36mag.append(master.k36mag.values[i])
        k45mag.append(master.k45mag.values[i])
        k58mag.append(master.k58mag.values[i])
        k80mag.append(master.k80mag.values[i])
        k24mag.append(master.k24mag.values[i])
        Jmag.append(master.Jmag.values[i])
        Hmag.append(master.Hmag.values[i])
        Kmag.append(master.Kmag.values[i])
        jMINUSh.append(master.jMINUSh.values[i])
        hMINUSk.append(master.hMINUSk.values[i])
        jMINUSk.append(master.jMINUSk.values[i])
    
    return ID, RA, Dec, k36mag, k45mag, k58mag, k80mag, k24mag, Jmag, Hmag, Kmag, jMINUSh, hMINUSk, jMINUSk

In [8]:
def error_lookup(source_rows):
    """
    Takes the row of a source in the CMD count, then uses it to look up the related RA, Dec, & magnitudes
    in the phot_data table.  source_rows should come from the output of the corr_rows function.
    
    It is very similar to mag_lookup, but accesses all of the errors associated with the magnitudes instead of 
    just the magnitudes.  Use in concert with mag_lookup to produce a full set of lists to put into a catalog.
    mag_lookup & error_lookup were originally one function, but calling them was a pain and it was easy to make 
    mistakes, so I split them into two.  This way, you can also create a catalog with only the magnitudes
    
    
    Call example:
        e36mag, e45mag, e58mag, e80mag, e24mag, eJmag, eHmag, eKmag, = error_lookup(source_rows)
    """
    
    e36mag = []
    e45mag = []
    e58mag = []
    e80mag = []
    e24mag = []
    eJmag = []
    eHmag = []
    eKmag = []
    
    k = 0 # row counter
    for i in source_rows:
        e36mag.append(master.e36mag.values[i])
        e45mag.append(master.e45mag.values[i])
        e58mag.append(master.e58mag.values[i])
        e80mag.append(master.e80mag.values[i])
        e24mag.append(master.e24mag.values[i])
        eJmag.append(master.eJmag.values[i])
        eHmag.append(master.eHmag.values[i])
        eKmag.append(master.eKmag.values[i])
    
    return e36mag, e45mag, e58mag, e80mag, e24mag, eJmag, eHmag, eKmag

In [9]:
def save_cat(filename):
    """
    Saves produced tiered catalogue of red candidates to a csv file.  
    Note that the file MUST NOT previously exist or else this function will just add the new columns 
    to the previously existing file.
    
    I didn't actually use this function in the catalog notebook, and as such it doesn't include the error columns.
    
    Call example:
        filename = '/Users/lgray/Documents/Phot_data/AGB_Catalogs/8Aug2018_NGC6822_HC_C.csv'
        save_cat(filename)
        
    HC = high confidence
    MC = medium confidence
    LC = low confidence
    C = C-rich AGB
    O = O-rich AGB
    """
    
    f = open(filename, 'w')
    writer = csv.writer(f)
    #add heading
    points_w_header = ['ID'] + ID

    for val in points_w_header:
        writer.writerow([val])

    f.close()

    # list of other columns
    cols = [RA, Dec, k36mag, e36mag, k45mag, e45mag, k58mag, e58mag, k80mag, e80mag, k24mag, e24mag, 
            Jmag, eJmag, Hmag, eHmag, Kmag, eKmag, jMINUSh, hMINUSk, jMINUSk]
    headers = ['RA', 'Dec', 'k36mag', 'e36mag', 'k45mag', 'e45mag', 'k58mag', 'e58mag', 'k80mag', 'e80mag', 
               'k24mag', 'e24mag', 'Jmag', 'eJmag', 'Hmag', 'eHmag', 'Kmag', 'eKmag', 'jMINUSh', 'hMINUSk', 'jMINUSk']

    c=0
    for i in cols:
        data = pd.read_csv(filename)
        new_col = pd.DataFrame({headers[c]:i})
        c = c+1

        data= pd.concat([data, new_col], axis=1)
        data.to_csv(filename, index=False)

In [10]:
def create_colors(filename, colors):
    """
    Create the colors you want in a catalog.
    
    I only included the colors for the eight CMDs that we're currently using, but any color can be coded in.
    
    Call example:
        filename = '/Users/lgray/Documents/Phot_data/AGB_Catalogs/8Aug2018_NGC6822_HC_C.csv'
        colors = ['jMINUSthreesix', 'threesixMINUSeightzero', 'fourfiveMINUSeightzero', 
                'hMINUSthreesix', 'hMINUSfourfive']
        create_colors(filename, colors)
    """
    
    header = []
    
    for name in colors:
        catalog = pd.read_csv(filename)
        
        if name is 'jMINUSthreesix':
            catalog[name] = catalog.Jmag.values - catalog.k36mag.values
            catalog.to_csv(filename, index=False)
        if name is 'threesixMINUSeightzero':
            catalog[name] = catalog.k36mag.values - catalog.k80mag.values
            catalog.to_csv(filename, index=False)
        if name is 'fourfiveMINUSeightzero':
            catalog[name] = catalog.k45mag.values - catalog.k80mag.values
            catalog.to_csv(filename, index=False)
        if name is 'hMINUSthreesix':
            catalog[name] = catalog.Hmag.values - catalog.k36mag.values
            catalog.to_csv(filename, index=False)
        if name is 'hMINUSfourfive':
            catalog[name] = catalog.Hmag.values - catalog.k45mag.values
            catalog.to_csv(filename, index=False)

In [11]:
file = '/Users/lgray/Documents/Phot_data/AGB_Catalogs/14Aug2018_NGC6822_'+'in4_'

ID = in4_ID
xvals = in4_jMINUSk
yvals = in4_Kmag

In [12]:
c_bin, o_bin = create_sections(ID, xvals, yvals)

#print(len(c_bin))
#print(len(o_bin))
#print(master.ID.values)
#print(in8_ID)
#print(len(in8_ID))

In [13]:
c_group = [c_bin]
c_length = [len(c_bin)]
o_group = [o_bin]
o_length = [len(o_bin)]

c_cat_rows = corr_rows(c_group, c_length)
o_cat_rows = corr_rows(o_group, o_length)

In [14]:
ID, RA, Dec, k36mag, k45mag, k58mag, k80mag, k24mag, Jmag, Hmag, Kmag, jMINUSh, hMINUSk, jMINUSk = mag_lookup(c_cat_rows)
e36mag, e45mag, e58mag, e80mag, e24mag, eJmag, eHmag, eKmag, = error_lookup(c_cat_rows)

filename = file+'C'+'.csv'

save_cat(filename)
colors = ['jMINUSthreesix', 'threesixMINUSeightzero', 'fourfiveMINUSeightzero', 'hMINUSthreesix', 'hMINUSfourfive']
create_colors(filename, colors)

In [15]:
ID, RA, Dec, k36mag, k45mag, k58mag, k80mag, k24mag, Jmag, Hmag, Kmag, jMINUSh, hMINUSk, jMINUSk = mag_lookup(o_cat_rows)
e36mag, e45mag, e58mag, e80mag, e24mag, eJmag, eHmag, eKmag, = error_lookup(o_cat_rows)

filename = file+'O'+'.csv'

save_cat(filename)
colors = ['jMINUSthreesix', 'threesixMINUSeightzero', 'fourfiveMINUSeightzero', 'hMINUSthreesix', 'hMINUSfourfive']
create_colors(filename, colors)