In [2]:
import numpy as np
import pandas as pd
from lsst.rsp import get_tap_service, retrieve_query

In [3]:
service = get_tap_service("tap")

In [4]:
my_adql_query = "SELECT TOP 10000 "+ \
	"object.objectId as obj_id "+ \
	",object.refExtendedness as extendedness "+ \
	",matches.truth_type as truth_type "+ \
    "FROM dp02_dc2_catalogs.Object object "+ \
	"JOIN dp02_dc2_catalogs.MatchesTruth matches on object.objectId=matches.match_objectId"

In [5]:
results = service.search(my_adql_query)
results_table = results.to_table().to_pandas()
results_table

Unnamed: 0,obj_id,extendedness,truth_type
0,1248684569339659648,0.0,1
1,1248684569339659638,1.0,1
2,1248684569339659639,0.0,2
3,1248684569339659642,1.0,1
4,1248684569339659643,1.0,1
...,...,...,...
9995,1248684569339654895,1.0,1
9996,1248684569339654903,1.0,1
9997,1248684569339654902,1.0,1
9998,1248684569339654901,1.0,1


In [6]:
galaxy_galaxy=results_table[(results_table["extendedness"]==1.0) & (results_table["truth_type"]==1)]
galaxy_galaxy.shape

(7894, 3)

In [7]:
not_galaxy=results_table[(results_table["extendedness"]==0.0) & (results_table["truth_type"]==1)]
not_galaxy.shape

(1860, 3)

In [8]:
galaxy_not=results_table[(results_table["extendedness"]==1.0) & (results_table["truth_type"]!=1)]
galaxy_not.shape

(95, 3)

In [9]:
not_not=results_table[(results_table["extendedness"]==0.0) & (results_table["truth_type"]!=1)]
not_not.shape

(148, 3)

In [10]:
condlist = [np.logical_and(results_table["extendedness"]==1, results_table["truth_type"]==1),
            np.logical_and(results_table["extendedness"]==1, results_table["truth_type"]!=1),
            np.logical_and(results_table["extendedness"]==0, results_table["truth_type"]==1),
            np.logical_and(results_table["extendedness"]==0, results_table["truth_type"]!=1)]
choicelist = ['true positive','false positive','false negative','true negative']
results_table['galaxy_match']=np.select(condlist, choicelist, np.NaN)

In [11]:
results_table

Unnamed: 0,obj_id,extendedness,truth_type,galaxy_match
0,1248684569339659648,0.0,1,false negative
1,1248684569339659638,1.0,1,true positive
2,1248684569339659639,0.0,2,true negative
3,1248684569339659642,1.0,1,true positive
4,1248684569339659643,1.0,1,true positive
...,...,...,...,...
9995,1248684569339654895,1.0,1,true positive
9996,1248684569339654903,1.0,1,true positive
9997,1248684569339654902,1.0,1,true positive
9998,1248684569339654901,1.0,1,true positive


- make function that can take the column of "is galaxy" and "truth_type" and return a statement like "% true positive, %false negative, etc"

In [12]:
np.count_nonzero(results_table['galaxy_match']=='false negative')

1860

In [22]:
def galaxy_match(galaxy_calc, galaxy_truth):
    print(galaxy_calc)
    print(galaxy_truth)
    condlist = [np.logical_and(galaxy_calc==1, galaxy_truth==1),
            np.logical_and(galaxy_calc==1, galaxy_truth!=1),
            np.logical_and(galaxy_calc==0, galaxy_truth==1),
            np.logical_and(galaxy_calc==0, galaxy_truth!=1)]
    choicelist = ['true positive','false positive','false negative','true negative']
    matched_array=np.select(condlist, choicelist, np.NaN)
    print(matched_array)
    return matched_array

In [23]:
myarray=galaxy_match(results_table["extendedness"],results_table["truth_type"])

0       0.0
1       1.0
2       0.0
3       1.0
4       1.0
       ... 
9995    1.0
9996    1.0
9997    1.0
9998    1.0
9999    1.0
Name: extendedness, Length: 10000, dtype: float64
0       1
1       1
2       2
3       1
4       1
       ..
9995    1
9996    1
9997    1
9998    1
9999    1
Name: truth_type, Length: 10000, dtype: int64
['false negative' 'true positive' 'true negative' ... 'true positive'
 'true positive' 'true positive']


In [17]:
myarray

array(['nan', 'true positive', 'nan', ..., 'true positive',
       'true positive', 'true positive'], dtype='<U32')

In [24]:
results_table['galaxy_match2']=myarray

In [25]:
results_table

Unnamed: 0,obj_id,extendedness,truth_type,galaxy_match,galaxy_match2
0,1248684569339659648,0.0,1,false negative,false negative
1,1248684569339659638,1.0,1,true positive,true positive
2,1248684569339659639,0.0,2,true negative,true negative
3,1248684569339659642,1.0,1,true positive,true positive
4,1248684569339659643,1.0,1,true positive,true positive
...,...,...,...,...,...
9995,1248684569339654895,1.0,1,true positive,true positive
9996,1248684569339654903,1.0,1,true positive,true positive
9997,1248684569339654902,1.0,1,true positive,true positive
9998,1248684569339654901,1.0,1,true positive,true positive


In [26]:
results_table['galaxy_match2']==results_table['galaxy_match']

0       True
1       True
2       True
3       True
4       True
        ... 
9995    True
9996    True
9997    True
9998    True
9999    True
Length: 10000, dtype: bool

In [28]:
np.count_nonzero((results_table['galaxy_match2']==results_table['galaxy_match'])==True)

10000

In [38]:
def confusion_matrix(matched_array, style='percents'):
    total=matched_array.size
    tp=np.count_nonzero(matched_array=='true positive')
    fp=np.count_nonzero(matched_array=='false positive')
    fn=np.count_nonzero(matched_array=='false negative')
    tn=np.count_nonzero(matched_array=='true negative')
    sums_matrix={'total':total, 'true positive':tp, 'false positive':fp, 'false negative':fn, 'true negative':tn}
    percents_matrix={'true positive %':("{:.1%}".format(tp/total)), 'false positive %':("{:.1%}".format(fp/total)), 
                     'false negative %':("{:.1%}".format(fn/total)), 'true negative %':("{:.1%}".format(tn/total))}
    if style=='percents':
        return percents_matrix
    elif style=='sums':
        return sums_matrix
    else:
        pass

In [39]:
confusion_matrix(myarray)

{'true positive %': '78.9%',
 'false positive %': '0.9%',
 'false negative %': '18.6%',
 'true negative %': '1.5%'}