In [2]:
%load_ext autoreload
%autoreload 2

In [12]:
import os
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from zwad.ad.postprocess import *

from IPython.display import display, HTML
pd.set_option('display.max_rows', 2000)

# Table of contents
* [AD results comparison](#AD-results-comparison)
* [Celestial plot](#Celestial-plot)

In [34]:
data_dir = '../data/'

# Load names of fakes
df = pd.read_csv(os.path.join(data_dir, 'fakes', 'fakes_m31_fake.csv'))
fake_names = {}
for _, (oid, name) in df.iterrows():
    fake_names[oid] = name

def apply_fake_names(table):
    table['oid'] = table['oid'].map(lambda oid: fake_names.get(oid, oid))

def fakes_counter(table):
    return sum(table['oid'].map(lambda s: isinstance(s, str)))

# Load AD results
tables = {}
alcos = ['iso', 'gmm', 'lof', 'svm']
for alco in alcos:
    tables[alco] = load_ad_tables_by_patterns([os.path.join(data_dir, 'm31_{}_fake.csv'.format(alco))])
    apply_fake_names(tables[alco])

combined_table = load_ad_tables_by_patterns([os.path.join(data_dir, 'm31_{}_fake.csv'.format(alco))
                                             for alco in alcos])
apply_fake_names(combined_table)

# Isolation Forest fakes

In [36]:
display(fakes_counter(tables['iso']))
display(tables['iso'])

10

Unnamed: 0,oid,m31_iso_fake
0,step,-0.75309
1,ZTF18abhjrcf_format_r,-0.740498
2,Gaia16aye_3_format_r,-0.737781
3,695211400034403,-0.707322
4,695211400124577,-0.702643
5,695211400053697,-0.696436
6,695211400102351,-0.693752
7,695211400132963,-0.692504
8,ZTF18abaqxrt_format_r,-0.691396
9,695211400088968,-0.691137


# Gaussian Mixture Models fakes

In [42]:
display(fakes_counter(tables['gmm']))
display(tables['gmm'])

9

Unnamed: 0,oid,m31_gmm_fake
0,step,-1509.605536
1,695211400034403,-393.006754
2,695211200009221,-324.942218
3,695211400124577,-314.184142
4,Gaia16aye_3_format_r,-300.175128
5,695211400000352,-299.543117
6,695211400102351,-274.96699
7,695211200020939,-260.667099
8,695211400053697,-260.108951
9,695211200008801,-252.255062


# Local Outlier Factor fakes

In [41]:
display(fakes_counter(tables['lof']))
display(tables['lof'])

12

Unnamed: 0,oid,m31_lof_fake
0,step,-8.912194
1,flat,-7.551189
2,MACHO-6.6696.60_format_R,-4.858569
3,flat_noise,-4.466071
4,Gaia16aye_2_format_r,-4.022277
5,MACHO-6.6696.60_format_B,-3.876315
6,695211200009221,-3.545021
7,695211400034403,-3.505094
8,695211100002984,-3.195999
9,Gaia16aye_3_format_r,-3.186308


# One class Support Vector Machines fakes

In [40]:
display(fakes_counter(tables['svm']))
display(tables['svm'])

12

Unnamed: 0,oid,m31_svm_fake
0,step,1.0
1,MACHO-6.6696.60_format_R,1.0
2,Gaia16aye_3_format_r,1.0
3,ZTF18abhjrcf_format_r,1.0
4,ZTF18abaqxrt_format_r,1.0
5,MACHO-6.6696.60_format_B,1.0
6,flat,1.000038
7,Gaia16aye_2_format_r,1.000055
8,Gaia16aye_format_r,1.000098
9,695211200075348,1.000543


# Combined table

In [44]:
display(fakes_counter(combined_table))
display(combined_table)

12

Unnamed: 0,oid,m31_iso_fake,m31_gmm_fake,m31_lof_fake,m31_svm_fake
0,step,-0.75309,-1509.605536,-8.912194,1.0
1,ZTF18abhjrcf_format_r,-0.740498,-199.798629,-2.539371,1.0
2,Gaia16aye_3_format_r,-0.737781,-300.175128,-3.186308,1.0
3,695211400034403,-0.707322,-393.006754,-3.505094,1.00494
4,695211400124577,-0.702643,-314.184142,-2.995712,1.010084
5,695211400053697,-0.696436,-260.108951,-2.624966,1.016399
6,695211400102351,-0.693752,-274.96699,-2.749282,1.004267
7,ZTF18abaqxrt_format_r,-0.691396,-194.866011,-2.847367,1.0
8,695211400088968,-0.691137,-197.432146,-2.130518,1.357826
9,695211400028274,-0.689623,-223.583553,-2.262296,1.015475
