In [1]:
%matplotlib inline
import sys
import os
import math
import pathlib
import logging
import numpy as np
import pandas
import psycopg2
import psycopg2.extras
from matplotlib import pyplot
from mosthosts_desi import MostHostsDesi

In [2]:
_logger = logging.getLogger( __name__ )
_logger.propagate = False
_logout = logging.StreamHandler( sys.stderr )
_logger.addHandler( _logout )
_logout.setFormatter( logging.Formatter( f'[%(asctime)s - %(levelname)s] - %(message)s' ) )
_logger.setLevel( logging.INFO )

In [3]:
mhd = MostHostsDesi( release="daily", force_regen=False, logger=_logger )
with open("/global/homes/r/raknop/secrets/decatdb_desi_desi") as ifp:
    dbuser, dbpasswd = ifp.readline().strip().split()
db = psycopg2.connect( host='decatdb.lbl.gov', dbname='desidb', user=dbuser, password=dbpasswd,
                       cursor_factory=psycopg2.extras.RealDictCursor )


[2023-01-04 10:43:06,292 - INFO] - Loading mosthosts table...
[2023-01-04 10:43:06,935 - INFO] - ...mosthosts table loaded.
[2023-01-04 10:43:06,950 - INFO] - Read dataframes from pkl files


In [4]:
db.rollback()
cursor = db.cursor()
cursor.execute( "SELECT DISTINCT ON (snname) * INTO TEMP TABLE mhjustsn FROM static.mosthosts ORDER BY snname" ) 
cursor.execute( "SELECT m1.snname AS snname_1, m2.snname AS snname_2, "
                "       m1.sn_ra AS sn_ra_1, m2.sn_ra AS sn_ra_2, "
                "       m1.sn_dec AS sn_dec_1, m2.sn_dec AS sn_dec_2, "
                "       m1.sn_z AS sn_z_1, m2.sn_z AS sn_z_2, "
                "       m1.program AS program_1, m2.program AS program_2, "
                "       m1.tns_name AS tns_name_1, m2.tns_name AS tns_name_2, "
                "       m1.tns_name AS iau_name_1, m2.iau_name AS tns_name_2, "
                "       m1.tns_name AS ptfiptf_name_1, m2.ptfiptf_name AS tns_name_2 "
                "FROM mhjustsn m1 "
                "INNER JOIN mhjustsn m2 "
                "   ON ABS( (m1.sn_ra - m2.sn_ra)/COS(m1.sn_dec * PI()/180.) ) < 1./3600. "
                "      AND ABS( m1.sn_dec - m2.sn_dec ) < 1./3600. "
                "      AND ( m1.snname != m2.snname ) " )
rows = cursor.fetchall()

# Everything will show up twice because of the inner join.  Try to crop that down.
croprows = {}
for row in rows:
    if ( row['snname_2'], row['snname_1'] ) not in croprows:
        croprows[ ( row['snname_1'], row['snname_2'] ) ] = row

duplicatesne = pandas.DataFrame( croprows.values() )

In [5]:
duplicatesne

Unnamed: 0,snname_1,snname_2,sn_ra_1,sn_ra_2,sn_dec_1,sn_dec_2,sn_z_1,sn_z_2,program_1,program_2,tns_name_1,tns_name_2,iau_name_1,ptfiptf_name_1
0,09hdo,09hdp,3.846665,3.846665,30.722015,30.722015,0.0470,0.04700,PTF-iPTF,PTF-iPTF,,PTF09hdp,,
1,11bnx,PTF11bnx,247.580510,247.580500,21.085354,21.085300,0.0600,-9999.00000,PTF-iPTF,historical_SNIa,,,,
2,11dzm,PTF11dzm,199.515367,199.515300,42.176136,42.176100,0.0400,-9999.00000,PTF-iPTF,historical_SNIa,,,,
3,11mpa,PTF11mpa,256.298517,256.298400,43.900742,43.900800,0.0320,-9999.00000,PTF-iPTF,historical_SNIa,,,,
4,11mpg,2007ie,334.402755,334.403000,0.613442,0.613333,0.0935,0.09345,PTF-iPTF,SDSS_II,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136,ZTF19aamljom,ZTF19aampllw,271.538220,271.538160,65.346360,65.346320,0.0200,-9999.00000,RCF/IIps/ZTFmarshal,RCF,SN2019cbc,,SN2019cbc,SN2019cbc
137,ZTF19aamlkrd,ZTF19aampmfl,259.978064,259.978086,21.008379,21.008374,-9999.0000,-9999.00000,SNe_Ia/RCF,SNe_Ia/RCF,AT2019bza,,AT2019bza,AT2019bza
138,ZTF19aampgrt,ZTF19aasneoh,267.819407,267.819490,50.760727,50.760695,-9999.0000,-9999.00000,SNe_Ia,SNe_Ia,,,,
139,ZTF19aawmcjt,ZTF20aakzbfh,249.769787,249.769772,8.358614,8.358598,0.0453,0.04500,ZTFmarshal,RCF/ZTFmarshal,SN2020beu,,SN2020beu,SN2020beu


In [10]:
missingindices1 = {}
missingindices2 = {}
sameindices = []
differentradec = {}
for dup in duplicatesne.itertuples():
    df1 = mhd.df.xs( dup.snname_1, level='snname' )
    df2 = mhd.df.xs( dup.snname_2, level='snname' )
    dupdex = ( dup.snname_1, dup.snname_2 )
    for i in df1.index.values:
        if i not in df2.index.values:
            if dupdex not in missingindices1:
                missingindices1[ dupdex ] = [ i ]
            else:
                missingindices1[ dupdex ].append( i )
        else:
            if ( abs( df1.loc[i].ra - df2.loc[i].ra ) / math.cos( df1.loc[i].dec * math.pi / 180. ) > 1./3600. 
                 and abs( df1.loc[i].dec - df2.loc[i].dec ) > 1./3600. ):
                if dupdex not in differentradec:
                    differentradec[ dupdex ] = [ i ]
                else:
                    differentradec[ dupdex ].append( i )
    for i in df2.index.values:
        if i not in df1.index.values:
            if dupdex not in missingindices2:
                missingindices2[ dupdex ] = [ i ]
            else:
                missingindices2[ dupdex ].append( i )
    if ( ( dupdex not in missingindices1 ) and
         ( dupdex not in missingindices2 ) and
         ( dupdex not in differentradec ) ):
        sameindices.append( dupdex )
                
_logger.warning( f"{len(missingindices1)} duplicates have some first indices missing from second, "
                 f"{len(missingindices2)} duplicates have some second indicies missing from first " )
_logger.warning( f"{len(differentradec)} duplicates have hosts with different RA/Dec values" )
_logger.warning( f"{len(sameindices)} duplicates have the same set of hosts" )



In [11]:
print( "**Duplicate SNe in the original mosthosts list with different sets of host indices**" )
for key, val in missingindices1.items():
    snname1, snname2 = key
    dexes1 = mhd.df.xs( snname1, level='snname' ).index.values
    dexes2 = mhd.df.xs( snname2, level='snname' ).index.values
    print( f"    Duplicate SN {snname1} has indexes {' '.join( str(i) for i in dexes1 )}, "
           f"{snname2} has indexes {' '.join( str(i) for i in dexes2 )}" )
print( "(more)" )
for key, val in missingindices2.items():
    if key not in missingindices1:
        snname1, snname2 = key
        dexes1 = mhd.df.xs( snname1, level='snname' ).index.values
        dexes2 = mhd.df.xs( snname2, level='snname' ).index.values
        print( f"    Duplicate SN {snname1} has indexes {' '.join( str(i) for i in dexes1 )}, "
               f"{snname2} has indexes {' '.join( str(i) for i in dexes2 )}" )
        
print( '\n\n**Hosts whose ra/dec don\'t match (w/in 1")**' )
for key, val in differentradec.items():
    ( sn1, sn2 ) = key
    print( f"  SN {sn1} / {sn2} " )
    for i in val:
        print( f"    Index {i} : {sn1} has ({mhd.df.loc[sn1,i].ra}, {mhd.df.loc[sn1,i].dec}) "
               f"while {sn2} has ({mhd.df.loc[sn2,i].ra}, {mhd.df.loc[sn2,i].dec})" )
        
print( '\n\n**Duplicate SNe with the same set of hosts (w/in 1")**' )
for key in sameindices:
    sn1, sn2 = key
    print( f'  {sn1:30s}   {sn2}' )

**Duplicate SNe in the original mosthosts list with different sets of host indices**
    Duplicate SN 11bnx has indexes 1 2, PTF11bnx has indexes 9
    Duplicate SN 11dzm has indexes 1, PTF11dzm has indexes 9
    Duplicate SN 11mpa has indexes 1, PTF11mpa has indexes 9
    Duplicate SN 11muf has indexes 1, PTF11muf has indexes 9
    Duplicate SN 11nga has indexes 1 2, PTF11nga has indexes 9
    Duplicate SN 11oyn has indexes 1, PTF11oyn has indexes 9
    Duplicate SN 12ena has indexes 1, PTF12ena has indexes 9
    Duplicate SN 12fuu has indexes 1, PTF12fuu has indexes 9
    Duplicate SN 13aro has indexes 1 2, PTF13aro has indexes 9
    Duplicate SN 16auf has indexes 1, iPTF16auf/SN2016ccz/Gaia16aqm/AT2016ccz has indexes 9
    Duplicate SN 2007jg has indexes 1, SN2007jg has indexes 9
    Duplicate SN 2007mm has indexes 1 2, SN2007mm has indexes 9
    Duplicate SN AT2020uth has indexes 9, ZTF20acggviy has indexes 1 2 3
    Duplicate SN ZTF18aapaohn has indexes 1 2 3, ZTF18aawurud has ind