> Comparison at real data

# Import

In [479]:
import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import plotly.express as px
import warnings
warnings.simplefilter("ignore", np.ComplexWarning)
from haversine import haversine
from IPython.display import HTML
import plotly.graph_objects as go

import copy 

import rpy2
import rpy2.robjects as ro 
from rpy2.robjects.vectors import FloatVector 
from rpy2.robjects.packages import importr

import matplotlib
from sklearn.svm import OneClassSVM
from sklearn.linear_model import SGDOneClassSVM
from sklearn.kernel_approximation import Nystroem
from sklearn.pipeline import make_pipeline

from sklearn.neighbors import LocalOutlierFactor

from sklearn.datasets import fetch_kddcup99, fetch_covtype, fetch_openml
from sklearn.preprocessing import LabelBinarizer

import tqdm

from pygsp import graphs, filters, plotting, utils

from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

import plotly.express as px

from sklearn.covariance import EmpiricalCovariance, MinCovDet

from alibi_detect.od import IForest
from pyod.models.abod import ABOD
from pyod.models.cblof import CBLOF
import seaborn as sns
from PyNomaly import loop
from sklearn import svm
from pyod.models.lscp import LSCP
from pyod.models.hbos import HBOS
from pyod.models.so_gaal import SO_GAAL
from pyod.models.mcd import MCD
from pyod.models.mo_gaal import MO_GAAL
from pyod.models.knn import KNN
from pyod.models.lof import LOF
from pyod.models.ocsvm import OCSVM
from pyod.models.feature_bagging import FeatureBagging
from pyod.models.sos import SOS

In [99]:
class earthquake_func:
    def __init__(self,df):
        self.df = df 
        self.f = df.Magnitude.to_numpy()
        self.year = df.Year.to_numpy()
        self.lat = df.Latitude.to_numpy()
        self.long = df.Longitude.to_numpy()
        self.n = len(self.f)
        
        self.theta= None
    def get_distance(self):
        self.D = np.zeros([self.n,self.n])
        locations = np.stack([self.lat, self.long],axis=1)
        for i in tqdm.tqdm(range(self.n)):
            for j in range(i,self.n): 
                self.D[i,j]=haversine(locations[i],locations[j])
        self.D = self.D+self.D.T
    def get_weightmatrix(self,theta=1,beta=0.5,kappa=4000):
        self.theta = theta
        dist = np.where(self.D<kappa,self.D,0)
        self.W = np.exp(-(dist/self.theta)**2)

    def _eigen(self):
        d= self.W.sum(axis=1)
        D= np.diag(d)
        self.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
        self.lamb, self.Psi = np.linalg.eigh(self.L)
        self.Lamb = np.diag(self.lamb)        
    def fit(self,ref=0.5): # fit with ebayesthresh
        self._eigen()
        self.fbar = self.Psi.T @ self.f # fbar := graph fourier transform of f
        self.power = self.fbar**2 
        ebayesthresh = importr('EbayesThresh').ebayesthresh
        self.power_threshed=np.array(ebayesthresh(FloatVector(self.fbar**2)))
        self.fbar_threshed = np.where(self.power_threshed>0,self.fbar,0)
        self.fhat = self.Psi@self.fbar_threshed
        self.df = self.df.assign(MagnitudeHat = self.fhat)
        self.df = self.df.assign(Residual = self.df.Magnitude- self.df.MagnitudeHat)
        self.con = np.where(self.df.Residual>0.7,1,0)
        
    def vis(self,MagThresh=7,ResThresh=1):
        fig = px.density_mapbox(self.df, 
                        lat='Latitude', 
                        lon='Longitude', 
                        z='Magnitude', 
                        radius=5,
                        center=dict(lat=37, lon=160), 
                        zoom=1.5,
                        height=900,
                        opacity = 0.4,
                        mapbox_style="stamen-terrain",
                        range_color=[-7,7])
        fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
        fig.add_scattermapbox(lat = self.df.query('Magnitude > @MagThresh')['Latitude'],
                      lon = self.df.query('Magnitude > @MagThresh')['Longitude'],
                      text = self.df.query('Magnitude > @MagThresh')['Magnitude'],
                      marker_size= 8,
                      marker_color= 'red',
                      opacity = 0.6
                      )
        fig.add_scattermapbox(lat = self.df.query('Residual**2 > @ResThresh')['Latitude'],
                      lon = self.df.query('Residual**2 > @ResThresh')['Longitude'],
                      text = self.df.query('Magnitude > @ResThresh')['Magnitude'],
                      marker_size= 8,
                      marker_color= 'blue',
                      opacity = 0.5
                      )
        return HTML(fig.to_html(include_mathjax=False, config=dict({'scrollZoom':False})))
    def visf(self):
        fig = px.density_mapbox(self.df, 
                        lat='Latitude', 
                        lon='Longitude', 
                        z='Magnitude', 
                        radius=5,
                        center=dict(lat=37, lon=160), 
                        zoom=1.5,
                        height=900,
                        opacity = 0.7,
                        mapbox_style="stamen-terrain",
                        range_color=[-7,7])
        fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
        return HTML(fig.to_html(include_mathjax=False, config=dict({'scrollZoom':False})))
    def visfhat(self):
        fig = px.density_mapbox(self.df, 
                        lat='Latitude', 
                        lon='Longitude', 
                        z='MagnitudeHat', 
                        radius=5,
                        center=dict(lat=37, lon=160), 
                        zoom=1.5,
                        height=900,
                        opacity = 0.7,
                        mapbox_style="stamen-terrain",
                        range_color=[-7,7])
        fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
        return HTML(fig.to_html(include_mathjax=False, config=dict({'scrollZoom':False})))
    def visres(self,MagThresh=7,ResThresh=1):
        fig = px.density_mapbox(self.df, 
                        lat='Latitude', 
                        lon='Longitude', 
                        z=[0] * len(self.df), 
                        radius=5,
                        center=dict(lat=37, lon=160), 
                        zoom=1.5,
                        height=900,
                        opacity = 0.7,
                        mapbox_style="stamen-terrain",
                        range_color=[-7,7])
        fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
        fig.add_scattermapbox(lat = self.df.query('Residual**2 > @ResThresh')['Latitude'],
                      lon = self.df.query('Residual**2 > @ResThresh')['Longitude'],
                      text = self.df.query('Magnitude > @ResThresh')['Magnitude'],
                      marker_size= 8,
                      marker_color= 'blue',
                      opacity = 0.7
                      )
        return HTML(fig.to_html(include_mathjax=False, config=dict({'scrollZoom':False})))

In [122]:
class Conf_matrx:
    def __init__(self,original,compare,tab):
        self.original = original
        self.compare = compare
        self.tab = tab
    def conf(self,name):
        self.conf_matrix = confusion_matrix(self.original, self.compare)
        
        fig, ax = plt.subplots(figsize=(5, 5))
        ax.matshow(self.conf_matrix, cmap=plt.cm.Oranges, alpha=0.3)
        for i in range(self.conf_matrix.shape[0]):
            for j in range(self.conf_matrix.shape[1]):
                ax.text(x=j, y=i,s=self.conf_matrix[i, j], va='center', ha='center', size='xx-large')
        plt.xlabel('Predictions', fontsize=18)
        plt.ylabel('Actuals', fontsize=18)
        plt.title('Confusion Matrix', fontsize=18)
        plt.show()
        
        self.acc = accuracy_score(self.original, self.compare)
        self.pre = precision_score(self.original, self.compare)
        self.rec = recall_score(self.original, self.compare)
        self.f1 = f1_score(self.original, self.compare)
        
        print('Accuracy: %.3f' % self.acc)
        print('Precision: %.3f' % self.pre)
        print('Recall: %.3f' % self.rec)
        print('F1 Score: %.3f' % self.f1)
        
        self.tab = self.tab.append(pd.DataFrame({"Accuracy":[self.acc],"Precision":[self.pre],"Recall":[self.rec],"F1":[self.f1]},index = [name]))

In [123]:
tab_gode = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1"])

## load data and clean it

`-` load

In [207]:
df_global= pd.concat([pd.read_csv('00_05.csv'),pd.read_csv('05_10.csv'),pd.read_csv('10_15.csv'),pd.read_csv('15_20.csv')]).iloc[:,[0,1,2,4]].rename(columns={'latitude':'Latitude','longitude':'Longitude','mag':'Magnitude'}).reset_index().iloc[:,1:]

`-` cleaning

In [208]:
df_global = df_global.assign(Year=list(map(lambda x: x.split('-')[0], df_global.time))).iloc[:,1:]

In [209]:
df_global.Year = df_global.Year.astype(np.float64)

In [210]:
df_global_10 = df_global.copy()
df_global_10 = df_global_10.query("2010 <= Year < 2015").reset_index().iloc[:,1:];df_global_10

Unnamed: 0,Latitude,Longitude,Magnitude,Year
0,0.663,-26.045,5.5,2010.0
1,-19.209,167.902,5.1,2010.0
2,-31.830,-178.135,5.0,2010.0
3,-19.984,168.353,5.0,2010.0
4,50.380,153.964,5.0,2010.0
...,...,...,...,...
12493,-22.874,69.345,5.2,2010.0
12494,42.360,-30.462,5.0,2010.0
12495,40.726,51.925,5.0,2010.0
12496,30.646,83.791,5.2,2010.0


### GODE

In [218]:
gode_global = earthquake_func(df_global_10)

`-` get distance 

In [219]:
gode_global.get_distance()

100%|██████████| 12498/12498 [07:20<00:00, 28.35it/s] 


In [220]:
gode_global.D[gode_global.D>0].mean()

8810.865423093777

`-` weight matrix

In [221]:
gode_global.get_weightmatrix(theta=(gode_global.D[gode_global.D>0].mean()),kappa=2500) 

`-` fit

In [352]:
gode_global.fit()

In [353]:
_df = gode_global.df.copy()

In [354]:
_df.sort_values("Residual",ascending=False).iloc[:40,:]

Unnamed: 0,Latitude,Longitude,Magnitude,Year,MagnitudeHat,Residual
2064,-36.122,-72.898,8.8,2010.0,7.572545,1.227455
3752,-19.6097,-70.7691,8.2,2014.0,7.075499,1.124501
12167,-36.122,-72.898,8.8,2010.0,7.742429,1.057571
9660,36.281,141.111,7.9,2011.0,6.912847,0.987153
6877,0.802,92.463,8.2,2012.0,7.353106,0.846894
7938,-21.611,-179.528,7.3,2011.0,6.497713,0.802287
10593,-3.487,100.082,7.8,2010.0,7.008107,0.791893
3835,-19.9807,-70.7022,6.7,2014.0,5.913563,0.786437
3281,-29.9772,-177.7247,6.9,2014.0,6.129969,0.770031
4997,-23.009,-177.232,7.4,2013.0,6.648946,0.751054


In [355]:
outlier_simul_one = (_df['Residual']**2).tolist()

In [356]:
outlier_simul_one = list(map(lambda x: -1 if x > 0.04 else 1,outlier_simul_one))

In [357]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one)],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE'})

Unnamed: 0,Latitude,Longitude,Magnitude,Year,MagnitudeHat,Residual,Anomalious Score,GODE
0,0.663,-26.045,5.5,2010.0,5.514405,-0.014405,0.000207,1
1,-19.209,167.902,5.1,2010.0,5.114861,-0.014861,0.000221,1
2,-31.830,-178.135,5.0,2010.0,5.159778,-0.159778,0.025529,1
3,-19.984,168.353,5.0,2010.0,5.214340,-0.214340,0.045942,-1
4,50.380,153.964,5.0,2010.0,5.099783,-0.099783,0.009957,1
...,...,...,...,...,...,...,...,...
12493,-22.874,69.345,5.2,2010.0,5.039599,0.160401,0.025728,1
12494,42.360,-30.462,5.0,2010.0,5.104141,-0.104141,0.010845,1
12495,40.726,51.925,5.0,2010.0,4.926934,0.073066,0.005339,1
12496,30.646,83.791,5.2,2010.0,5.240853,-0.040853,0.001669,1


In [None]:
_conf = Conf_matrx(outlier_true_one,outlier_simul_one,tab_gode)

In [None]:
_conf.conf("GODE")

In [None]:
one = _conf.tab

### LOF

In [358]:
clf = LocalOutlierFactor(n_neighbors=2)

In [360]:
lof_rst = clf.fit_predict(_df)

In [361]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF'})

Unnamed: 0,Latitude,Longitude,Magnitude,Year,MagnitudeHat,Residual,Anomalious Score,GODE,LOF
0,0.663,-26.045,5.5,2010.0,5.514405,-0.014405,0.000207,1,1
1,-19.209,167.902,5.1,2010.0,5.114861,-0.014861,0.000221,1,1
2,-31.830,-178.135,5.0,2010.0,5.159778,-0.159778,0.025529,1,1
3,-19.984,168.353,5.0,2010.0,5.214340,-0.214340,0.045942,-1,1
4,50.380,153.964,5.0,2010.0,5.099783,-0.099783,0.009957,1,1
...,...,...,...,...,...,...,...,...,...
12493,-22.874,69.345,5.2,2010.0,5.039599,0.160401,0.025728,1,1
12494,42.360,-30.462,5.0,2010.0,5.104141,-0.104141,0.010845,1,1
12495,40.726,51.925,5.0,2010.0,4.926934,0.073066,0.005339,1,-1
12496,30.646,83.791,5.2,2010.0,5.240853,-0.040853,0.001669,1,1


In [None]:
_conf = Conf_matrx(outlier_true_one,clf.fit_predict(X),tab_orbit)

In [None]:
_conf.conf("LOF (Breunig et al., 2000)")

In [None]:
two = one.append(_conf.tab)

### KNN

In [365]:
clf = KNN()
clf.fit(_df[['Latitude', 'Longitude','Magnitude']])
# _df['knn_clf'] = clf.labels_

KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=5, p=2,
  radius=1.0)

In [366]:
outlier_KNN_one = list(clf.labels_)

In [367]:
outlier_KNN_one = list(map(lambda x: 1 if x==0  else -1,outlier_KNN_one))

In [415]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'}),
          pd.DataFrame(outlier_KNN_one).rename(columns={0:'KNN'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF',
                         'KNN':'KNN'})

Unnamed: 0,Latitude,Longitude,Magnitude,Year,MagnitudeHat,Residual,Anomalious Score,GODE,LOF,KNN
0,0.663,-26.045,5.5,2010.0,5.514405,-0.014405,0.000207,1,1,1
1,-19.209,167.902,5.1,2010.0,5.114861,-0.014861,0.000221,1,1,1
2,-31.830,-178.135,5.0,2010.0,5.159778,-0.159778,0.025529,1,1,1
3,-19.984,168.353,5.0,2010.0,5.214340,-0.214340,0.045942,-1,1,1
4,50.380,153.964,5.0,2010.0,5.099783,-0.099783,0.009957,1,1,-1
...,...,...,...,...,...,...,...,...,...,...
12493,-22.874,69.345,5.2,2010.0,5.039599,0.160401,0.025728,1,1,1
12494,42.360,-30.462,5.0,2010.0,5.104141,-0.104141,0.010845,1,1,-1
12495,40.726,51.925,5.0,2010.0,4.926934,0.073066,0.005339,1,-1,-1
12496,30.646,83.791,5.2,2010.0,5.240853,-0.040853,0.001669,1,1,-1


In [None]:
_conf = Conf_matrx(outlier_true_one,outlier_KNN_one,tab_orbit)

In [None]:
_conf.conf("kNN (Ramaswamy et al., 2000)")

In [None]:
three = two.append(_conf.tab)

### CBLOF

In [476]:
clf = CBLOF(contamination=0.05,check_estimator=False, random_state=77,clustering_estimator=cluster_estimator)

In [478]:
_model = CBLOF(n_clusters=8, contamination=0.1, random_state=42)
_model.fit(_X_train)

# 예측: 이상치 점수 계산
_X_test = np.random.randn(10, 2)  # 10개의 테스트 데이터 포인트
_scores = model.decision_function(_X_test)

# 예측 결과 출력
for i in range(len(X_test)):
    print("이상치 점수:", scores[i])
    if scores[i] > 0:
        print("-> 이상치로 판단됨.")
    else:
        print("-> 정상 데이터로 판단됨.")




AttributeError: 'NoneType' object has no attribute 'split'

AttributeError: 'NoneType' object has no attribute 'split'

AttributeError: 'NoneType' object has no attribute 'split'

AttributeError: 'NoneType' object has no attribute 'split'

AttributeError: 'NoneType' object has no attribute 'split'

AttributeError: 'NoneType' object has no attribute 'split'

AttributeError: 'NoneType' object has no attribute 'split'

AttributeError: 'NoneType' object has no attribute 'split'

AttributeError: 'NoneType' object has no attribute 'split'

AttributeError: 'NoneType' object has no attribute 'split'

AttributeError: 'NoneType' object has no attribute 'split'

ValueError: Could not form valid cluster separation. Please change n_clusters or change clustering method

In [474]:
CBLOF?

[0;31mInit signature:[0m
[0mCBLOF[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mn_clusters[0m[0;34m=[0m[0;36m8[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcontamination[0m[0;34m=[0m[0;36m0.1[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mclustering_estimator[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0malpha[0m[0;34m=[0m[0;36m0.9[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbeta[0m[0;34m=[0m[0;36m5[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0muse_weights[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcheck_estimator[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mrandom_state[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mn_jobs[0m[0;34m=[0m[0;36m1[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
The CBLOF operator calculates the outlier score based on cluster-based
local outlier factor.

CBLOF takes 

In [477]:
clf.fit(_df[['Latitude','Longitude','Magnitude']])



AttributeError: 'NoneType' object has no attribute 'split'

AttributeError: 'NoneType' object has no attribute 'split'

AttributeError: 'NoneType' object has no attribute 'split'

AttributeError: 'NoneType' object has no attribute 'split'

AttributeError: 'NoneType' object has no attribute 'split'

AttributeError: 'NoneType' object has no attribute 'split'

AttributeError: 'NoneType' object has no attribute 'split'

AttributeError: 'NoneType' object has no attribute 'split'

AttributeError: 'NoneType' object has no attribute 'split'

AttributeError: 'NoneType' object has no attribute 'split'



ValueError: Could not form valid cluster separation. Please change n_clusters or change clustering method

In [None]:
# _df['CBLOF_Clf'] = clf.labels_

In [None]:
outlier_CBLOF_one = list(clf.labels_)

In [None]:
outlier_CBLOF_one = list(map(lambda x: 1 if x==0  else -1,outlier_CBLOF_one))

In [None]:
_conf = Conf_matrx(outlier_true_one,outlier_CBLOF_one,tab_orbit)

In [None]:
_conf.conf("CBLOF (He et al., 2003)")

In [None]:
four = three.append(_conf.tab)

### OCSVM

In [376]:
clf = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma=0.1)

In [377]:
clf.fit(_df)

In [378]:
outlier_OSVM_one = list(clf.predict(_df))

In [416]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'}),
          pd.DataFrame(outlier_KNN_one).rename(columns={0:'KNN'}),
          pd.DataFrame(outlier_OSVM_one).rename(columns={0:'OCSVM'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF',
                         'KNN':'KNN',
                         'OCSVM':'OCSVM'})

Unnamed: 0,Latitude,Longitude,Magnitude,Year,MagnitudeHat,Residual,Anomalious Score,GODE,LOF,KNN,OCSVM
0,0.663,-26.045,5.5,2010.0,5.514405,-0.014405,0.000207,1,1,1,-1
1,-19.209,167.902,5.1,2010.0,5.114861,-0.014861,0.000221,1,1,1,1
2,-31.830,-178.135,5.0,2010.0,5.159778,-0.159778,0.025529,1,1,1,1
3,-19.984,168.353,5.0,2010.0,5.214340,-0.214340,0.045942,-1,1,1,1
4,50.380,153.964,5.0,2010.0,5.099783,-0.099783,0.009957,1,1,-1,1
...,...,...,...,...,...,...,...,...,...,...,...
12493,-22.874,69.345,5.2,2010.0,5.039599,0.160401,0.025728,1,1,1,1
12494,42.360,-30.462,5.0,2010.0,5.104141,-0.104141,0.010845,1,1,-1,1
12495,40.726,51.925,5.0,2010.0,4.926934,0.073066,0.005339,1,-1,-1,-1
12496,30.646,83.791,5.2,2010.0,5.240853,-0.040853,0.001669,1,1,-1,1


In [None]:
_conf = Conf_matrx(outlier_true_one,outlier_OSVM_one,tab_orbit)

In [None]:
_conf.conf("OCSVM (Sch ̈olkopf et al., 2001)")

In [None]:
five = four.append(_conf.tab)

### MCD

In [380]:
clf = MCD()
clf.fit(_df[['Latitude','Longitude','Magnitude']])
# _df['MCD_clf'] = clf.labels_

MCD(assume_centered=False, contamination=0.1, random_state=None,
  store_precision=True, support_fraction=None)

In [381]:
outlier_MCD_one = list(clf.labels_)

In [382]:
outlier_MCD_one = list(map(lambda x: 1 if x==0  else -1,outlier_MCD_one))

In [417]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'}),
          pd.DataFrame(outlier_KNN_one).rename(columns={0:'KNN'}),
          pd.DataFrame(outlier_OSVM_one).rename(columns={0:'OCSVM'}),
          pd.DataFrame(outlier_MCD_one).rename(columns={0:'MCD'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF',
                         'KNN':'KNN',
                         'OCSVM':'OCSVM',
                         'MCD':'MCD'})

Unnamed: 0,Latitude,Longitude,Magnitude,Year,MagnitudeHat,Residual,Anomalious Score,GODE,LOF,KNN,OCSVM,MCD
0,0.663,-26.045,5.5,2010.0,5.514405,-0.014405,0.000207,1,1,1,-1,1
1,-19.209,167.902,5.1,2010.0,5.114861,-0.014861,0.000221,1,1,1,1,1
2,-31.830,-178.135,5.0,2010.0,5.159778,-0.159778,0.025529,1,1,1,1,-1
3,-19.984,168.353,5.0,2010.0,5.214340,-0.214340,0.045942,-1,1,1,1,1
4,50.380,153.964,5.0,2010.0,5.099783,-0.099783,0.009957,1,1,-1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...
12493,-22.874,69.345,5.2,2010.0,5.039599,0.160401,0.025728,1,1,1,1,1
12494,42.360,-30.462,5.0,2010.0,5.104141,-0.104141,0.010845,1,1,-1,1,1
12495,40.726,51.925,5.0,2010.0,4.926934,0.073066,0.005339,1,-1,-1,-1,1
12496,30.646,83.791,5.2,2010.0,5.240853,-0.040853,0.001669,1,1,-1,1,1


In [None]:
_conf = Conf_matrx(outlier_true_one,outlier_MCD_one,tab_orbit)

In [None]:
_conf.conf("MCD (Hardin and Rocke, 2004)")

In [None]:
six = five.append(_conf.tab)

### Feature Bagging

In [386]:
clf = FeatureBagging()
clf.fit(_df[['Latitude','Longitude','Magnitude']])
# _df['FeatureBagging_clf'] = clf.labels_

FeatureBagging(base_estimator=None, bootstrap_features=False,
        check_detector=True, check_estimator=False, combination='average',
        contamination=0.1, estimator_params={}, max_features=1.0,
        n_estimators=10, n_jobs=1, random_state=None, verbose=0)

In [387]:
outlier_FeatureBagging_one = list(clf.labels_)

In [388]:
outlier_FeatureBagging_one = list(map(lambda x: 1 if x==0  else -1,outlier_FeatureBagging_one))

In [418]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'}),
          pd.DataFrame(outlier_KNN_one).rename(columns={0:'KNN'}),
          pd.DataFrame(outlier_OSVM_one).rename(columns={0:'OCSVM'}),
          pd.DataFrame(outlier_MCD_one).rename(columns={0:'MCD'}),
          pd.DataFrame(outlier_FeatureBagging_one).rename(columns={0:'Feature Bagging'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF',
                         'KNN':'KNN',
                         'OCSVM':'OCSVM',
                         'MCD':'MCD',
                         'Feature Bagging':'Feature Bagging'})

Unnamed: 0,Latitude,Longitude,Magnitude,Year,MagnitudeHat,Residual,Anomalious Score,GODE,LOF,KNN,OCSVM,MCD,Feature Bagging
0,0.663,-26.045,5.5,2010.0,5.514405,-0.014405,0.000207,1,1,1,-1,1,1
1,-19.209,167.902,5.1,2010.0,5.114861,-0.014861,0.000221,1,1,1,1,1,1
2,-31.830,-178.135,5.0,2010.0,5.159778,-0.159778,0.025529,1,1,1,1,-1,1
3,-19.984,168.353,5.0,2010.0,5.214340,-0.214340,0.045942,-1,1,1,1,1,1
4,50.380,153.964,5.0,2010.0,5.099783,-0.099783,0.009957,1,1,-1,1,1,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
12493,-22.874,69.345,5.2,2010.0,5.039599,0.160401,0.025728,1,1,1,1,1,1
12494,42.360,-30.462,5.0,2010.0,5.104141,-0.104141,0.010845,1,1,-1,1,1,1
12495,40.726,51.925,5.0,2010.0,4.926934,0.073066,0.005339,1,-1,-1,-1,1,1
12496,30.646,83.791,5.2,2010.0,5.240853,-0.040853,0.001669,1,1,-1,1,1,1


In [None]:
_conf = Conf_matrx(outlier_true_one,outlier_FeatureBagging_one,tab_orbit)

In [None]:
_conf.conf("Feature Bagging (Lazarevic and Kumar, 2005)")

In [None]:
seven = six.append(_conf.tab)

### ABOD

In [390]:
clf = ABOD(contamination=0.05)
clf.fit(_df[['Latitude','Longitude','Magnitude']])
# _df['ABOD_Clf'] = clf.labels_

ABOD(contamination=0.05, method='fast', n_neighbors=5)

In [391]:
outlier_ABOD_one = list(clf.labels_)

In [392]:
outlier_ABOD_one = list(map(lambda x: 1 if x==0  else -1,outlier_ABOD_one))

In [419]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'}),
          pd.DataFrame(outlier_KNN_one).rename(columns={0:'KNN'}),
          pd.DataFrame(outlier_OSVM_one).rename(columns={0:'OCSVM'}),
          pd.DataFrame(outlier_MCD_one).rename(columns={0:'MCD'}),
          pd.DataFrame(outlier_FeatureBagging_one).rename(columns={0:'Feature Bagging'}),
          pd.DataFrame(outlier_ABOD_one).rename(columns={0:'ABOD'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF',
                         'KNN':'KNN',
                         'OCSVM':'OCSVM',
                         'MCD':'MCD',
                         'Feature Bagging':'Feature Bagging',
                         'ABOD':'ABOD'})

Unnamed: 0,Latitude,Longitude,Magnitude,Year,MagnitudeHat,Residual,Anomalious Score,GODE,LOF,KNN,OCSVM,MCD,Feature Bagging,ABOD
0,0.663,-26.045,5.5,2010.0,5.514405,-0.014405,0.000207,1,1,1,-1,1,1,1
1,-19.209,167.902,5.1,2010.0,5.114861,-0.014861,0.000221,1,1,1,1,1,1,1
2,-31.830,-178.135,5.0,2010.0,5.159778,-0.159778,0.025529,1,1,1,1,-1,1,1
3,-19.984,168.353,5.0,2010.0,5.214340,-0.214340,0.045942,-1,1,1,1,1,1,1
4,50.380,153.964,5.0,2010.0,5.099783,-0.099783,0.009957,1,1,-1,1,1,-1,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12493,-22.874,69.345,5.2,2010.0,5.039599,0.160401,0.025728,1,1,1,1,1,1,1
12494,42.360,-30.462,5.0,2010.0,5.104141,-0.104141,0.010845,1,1,-1,1,1,1,-1
12495,40.726,51.925,5.0,2010.0,4.926934,0.073066,0.005339,1,-1,-1,-1,1,1,1
12496,30.646,83.791,5.2,2010.0,5.240853,-0.040853,0.001669,1,1,-1,1,1,1,1


In [None]:
_conf = Conf_matrx(outlier_true_one,outlier_ABOD_one,tab_orbit)

In [None]:
_conf.conf("ABOD (Kriegel et al., 2008)")

In [None]:
eight = seven.append(_conf.tab)

### IForest

In [394]:
od = IForest(
    threshold=0.,
    n_estimators=100
)

In [395]:
od.fit(_df[['Latitude','Longitude','Magnitude']])

In [396]:
preds = od.predict(
    _df[['Latitude','Longitude','Magnitude']],
    return_instance_score=True
)

In [397]:
# _df['IF_alibi'] = preds['data']['is_outlier']

In [398]:
# outlier_alibi_one = _df['IF_alibi']
outlier_alibi_one = preds['data']['is_outlier']

In [399]:
outlier_alibi_one = list(map(lambda x: 1 if x==0  else -1,outlier_alibi_one))

In [420]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'}),
          pd.DataFrame(outlier_KNN_one).rename(columns={0:'KNN'}),
          pd.DataFrame(outlier_OSVM_one).rename(columns={0:'OCSVM'}),
          pd.DataFrame(outlier_MCD_one).rename(columns={0:'MCD'}),
          pd.DataFrame(outlier_FeatureBagging_one).rename(columns={0:'Feature Bagging'}),
          pd.DataFrame(outlier_ABOD_one).rename(columns={0:'ABOD'}),
          pd.DataFrame(outlier_alibi_one).rename(columns={0:'IForest'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF',
                         'KNN':'KNN',
                         'OCSVM':'OCSVM',
                         'MCD':'MCD',
                         'Feature Bagging':'Feature Bagging',
                         'ABOD':'ABOD',
                         'IForest':'IForest'})

Unnamed: 0,Latitude,Longitude,Magnitude,Year,MagnitudeHat,Residual,Anomalious Score,GODE,LOF,KNN,OCSVM,MCD,Feature Bagging,ABOD,IForest
0,0.663,-26.045,5.5,2010.0,5.514405,-0.014405,0.000207,1,1,1,-1,1,1,1,-1
1,-19.209,167.902,5.1,2010.0,5.114861,-0.014861,0.000221,1,1,1,1,1,1,1,1
2,-31.830,-178.135,5.0,2010.0,5.159778,-0.159778,0.025529,1,1,1,1,-1,1,1,-1
3,-19.984,168.353,5.0,2010.0,5.214340,-0.214340,0.045942,-1,1,1,1,1,1,1,1
4,50.380,153.964,5.0,2010.0,5.099783,-0.099783,0.009957,1,1,-1,1,1,-1,-1,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12493,-22.874,69.345,5.2,2010.0,5.039599,0.160401,0.025728,1,1,1,1,1,1,1,1
12494,42.360,-30.462,5.0,2010.0,5.104141,-0.104141,0.010845,1,1,-1,1,1,1,-1,-1
12495,40.726,51.925,5.0,2010.0,4.926934,0.073066,0.005339,1,-1,-1,-1,1,1,1,-1
12496,30.646,83.791,5.2,2010.0,5.240853,-0.040853,0.001669,1,1,-1,1,1,1,1,1


In [None]:
_conf = Conf_matrx(outlier_true_one,outlier_alibi_one,tab_orbit)

In [None]:
_conf.conf("Isolation Forest (Liu et al., 2008)")

In [None]:
nine = eight.append(_conf.tab)

### HBOS

In [401]:
clf = HBOS()
clf.fit(_df[['Latitude','Longitude','Magnitude']])
# _df['HBOS_clf'] = clf.labels_

HBOS(alpha=0.1, contamination=0.1, n_bins=10, tol=0.5)

In [402]:
outlier_HBOS_one = list(clf.labels_)

In [403]:
outlier_HBOS_one = list(map(lambda x: 1 if x==0  else -1,outlier_HBOS_one))

In [421]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'}),
          pd.DataFrame(outlier_KNN_one).rename(columns={0:'KNN'}),
          pd.DataFrame(outlier_OSVM_one).rename(columns={0:'OCSVM'}),
          pd.DataFrame(outlier_MCD_one).rename(columns={0:'MCD'}),
          pd.DataFrame(outlier_FeatureBagging_one).rename(columns={0:'Feature Bagging'}),
          pd.DataFrame(outlier_ABOD_one).rename(columns={0:'ABOD'}),
          pd.DataFrame(outlier_alibi_one).rename(columns={0:'IForest'}),
          pd.DataFrame(outlier_HBOS_one).rename(columns={0:'HBOS'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF',
                         'KNN':'KNN',
                         'OCSVM':'OCSVM',
                         'MCD':'MCD',
                         'Feature Bagging':'Feature Bagging',
                         'ABOD':'ABOD',
                         'IForest':'IForest',
                         'HBOS':'HBOS'})

Unnamed: 0,Latitude,Longitude,Magnitude,Year,MagnitudeHat,Residual,Anomalious Score,GODE,LOF,KNN,OCSVM,MCD,Feature Bagging,ABOD,IForest,HBOS
0,0.663,-26.045,5.5,2010.0,5.514405,-0.014405,0.000207,1,1,1,-1,1,1,1,-1,1
1,-19.209,167.902,5.1,2010.0,5.114861,-0.014861,0.000221,1,1,1,1,1,1,1,1,1
2,-31.830,-178.135,5.0,2010.0,5.159778,-0.159778,0.025529,1,1,1,1,-1,1,1,-1,1
3,-19.984,168.353,5.0,2010.0,5.214340,-0.214340,0.045942,-1,1,1,1,1,1,1,1,1
4,50.380,153.964,5.0,2010.0,5.099783,-0.099783,0.009957,1,1,-1,1,1,-1,-1,-1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12493,-22.874,69.345,5.2,2010.0,5.039599,0.160401,0.025728,1,1,1,1,1,1,1,1,1
12494,42.360,-30.462,5.0,2010.0,5.104141,-0.104141,0.010845,1,1,-1,1,1,1,-1,-1,1
12495,40.726,51.925,5.0,2010.0,4.926934,0.073066,0.005339,1,-1,-1,-1,1,1,1,-1,1
12496,30.646,83.791,5.2,2010.0,5.240853,-0.040853,0.001669,1,1,-1,1,1,1,1,1,1


In [None]:
_conf = Conf_matrx(outlier_true_one,outlier_HBOS_one,tab_orbit)

In [None]:
_conf.conf("HBOS (Goldstein and Dengel, 2012)")

In [None]:
ten = nine.append(_conf.tab)

### SOS

In [405]:
outlier_SOS_one = list(clf.labels_)

In [406]:
outlier_SOS_one = list(map(lambda x: 1 if x==0  else -1,outlier_SOS_one))

In [407]:
clf = SOS()
clf.fit(_df[['Latitude','Longitude','Magnitude']])
# _df['SOS_clf'] = clf.labels_

SOS(contamination=0.1, eps=1e-05, metric='euclidean', perplexity=4.5)

In [422]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'}),
          pd.DataFrame(outlier_KNN_one).rename(columns={0:'KNN'}),
          pd.DataFrame(outlier_OSVM_one).rename(columns={0:'OCSVM'}),
          pd.DataFrame(outlier_MCD_one).rename(columns={0:'MCD'}),
          pd.DataFrame(outlier_FeatureBagging_one).rename(columns={0:'Feature Bagging'}),
          pd.DataFrame(outlier_ABOD_one).rename(columns={0:'ABOD'}),
          pd.DataFrame(outlier_alibi_one).rename(columns={0:'IForest'}),
          pd.DataFrame(outlier_HBOS_one).rename(columns={0:'HBOS'}),
          pd.DataFrame(outlier_SOS_one).rename(columns={0:'SOS'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF',
                         'KNN':'KNN',
                         'OCSVM':'OCSVM',
                         'MCD':'MCD',
                         'Feature Bagging':'Feature Bagging',
                         'ABOD':'ABOD',
                         'IForest':'IForest',
                         'HBOS':'HBOS',
                         'SOS':'SOS'})

Unnamed: 0,Latitude,Longitude,Magnitude,Year,MagnitudeHat,Residual,Anomalious Score,GODE,LOF,KNN,OCSVM,MCD,Feature Bagging,ABOD,IForest,HBOS,SOS
0,0.663,-26.045,5.5,2010.0,5.514405,-0.014405,0.000207,1,1,1,-1,1,1,1,-1,1,1
1,-19.209,167.902,5.1,2010.0,5.114861,-0.014861,0.000221,1,1,1,1,1,1,1,1,1,1
2,-31.830,-178.135,5.0,2010.0,5.159778,-0.159778,0.025529,1,1,1,1,-1,1,1,-1,1,1
3,-19.984,168.353,5.0,2010.0,5.214340,-0.214340,0.045942,-1,1,1,1,1,1,1,1,1,1
4,50.380,153.964,5.0,2010.0,5.099783,-0.099783,0.009957,1,1,-1,1,1,-1,-1,-1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12493,-22.874,69.345,5.2,2010.0,5.039599,0.160401,0.025728,1,1,1,1,1,1,1,1,1,1
12494,42.360,-30.462,5.0,2010.0,5.104141,-0.104141,0.010845,1,1,-1,1,1,1,-1,-1,1,1
12495,40.726,51.925,5.0,2010.0,4.926934,0.073066,0.005339,1,-1,-1,-1,1,1,1,-1,1,1
12496,30.646,83.791,5.2,2010.0,5.240853,-0.040853,0.001669,1,1,-1,1,1,1,1,1,1,1


In [None]:
_conf = Conf_matrx(outlier_true_one,outlier_SOS_one,tab_orbit)

In [None]:
_conf.conf("SOS (Janssens et al., 2012)")

In [None]:
eleven = ten.append(_conf.tab)

### SO_GAAL

In [325]:
clf = SO_GAAL()
clf.fit(_df[['Latitude','Longitude','Magnitude']])
# _df['SO_GAAL_clf'] = clf.labels_

  super().__init__(name, **kwargs)


Epoch 1 of 60

Testing for epoch 1 index 1:

Testing for epoch 1 index 2:

Testing for epoch 1 index 3:

Testing for epoch 1 index 4:

Testing for epoch 1 index 5:

Testing for epoch 1 index 6:

Testing for epoch 1 index 7:

Testing for epoch 1 index 8:

Testing for epoch 1 index 9:

Testing for epoch 1 index 10:

Testing for epoch 1 index 11:

Testing for epoch 1 index 12:

Testing for epoch 1 index 13:

Testing for epoch 1 index 14:

Testing for epoch 1 index 15:

Testing for epoch 1 index 16:

Testing for epoch 1 index 17:

Testing for epoch 1 index 18:

Testing for epoch 1 index 19:

Testing for epoch 1 index 20:

Testing for epoch 1 index 21:

Testing for epoch 1 index 22:

Testing for epoch 1 index 23:

Testing for epoch 1 index 24:
Epoch 2 of 60

Testing for epoch 2 index 1:

Testing for epoch 2 index 2:

Testing for epoch 2 index 3:

Testing for epoch 2 index 4:

Testing for epoch 2 index 5:

Testing for epoch 2 index 6:

Testing for epoch 2 index 7:

Testing for epoch 2 index 

SO_GAAL(contamination=0.1, lr_d=0.01, lr_g=0.0001, momentum=0.9,
    stop_epochs=20)

In [326]:
outlier_SO_GAAL_one = list(clf.labels_)

In [327]:
outlier_SO_GAAL_one = list(map(lambda x: 1 if x==0  else -1,outlier_SO_GAAL_one))

In [423]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'}),
          pd.DataFrame(outlier_KNN_one).rename(columns={0:'KNN'}),
          pd.DataFrame(outlier_OSVM_one).rename(columns={0:'OCSVM'}),
          pd.DataFrame(outlier_MCD_one).rename(columns={0:'MCD'}),
          pd.DataFrame(outlier_FeatureBagging_one).rename(columns={0:'Feature Bagging'}),
          pd.DataFrame(outlier_ABOD_one).rename(columns={0:'ABOD'}),
          pd.DataFrame(outlier_alibi_one).rename(columns={0:'IForest'}),
          pd.DataFrame(outlier_HBOS_one).rename(columns={0:'HBOS'}),
          pd.DataFrame(outlier_SOS_one).rename(columns={0:'SOS'}),
          pd.DataFrame(outlier_SO_GAAL_one).rename(columns={0:'SO_GAAL'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF',
                         'KNN':'KNN',
                         'OCSVM':'OCSVM',
                         'MCD':'MCD',
                         'Feature Bagging':'Feature Bagging',
                         'ABOD':'ABOD',
                         'IForest':'IForest',
                         'HBOS':'HBOS',
                         'SOS':'SOS',
                         'SO_GAAL':'SO_GAAL'})

Unnamed: 0,Latitude,Longitude,Magnitude,Year,MagnitudeHat,Residual,Anomalious Score,GODE,LOF,KNN,OCSVM,MCD,Feature Bagging,ABOD,IForest,HBOS,SOS,SO_GAAL
0,0.663,-26.045,5.5,2010.0,5.514405,-0.014405,0.000207,1,1,1,-1,1,1,1,-1,1,1,1
1,-19.209,167.902,5.1,2010.0,5.114861,-0.014861,0.000221,1,1,1,1,1,1,1,1,1,1,1
2,-31.830,-178.135,5.0,2010.0,5.159778,-0.159778,0.025529,1,1,1,1,-1,1,1,-1,1,1,1
3,-19.984,168.353,5.0,2010.0,5.214340,-0.214340,0.045942,-1,1,1,1,1,1,1,1,1,1,1
4,50.380,153.964,5.0,2010.0,5.099783,-0.099783,0.009957,1,1,-1,1,1,-1,-1,-1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12493,-22.874,69.345,5.2,2010.0,5.039599,0.160401,0.025728,1,1,1,1,1,1,1,1,1,1,1
12494,42.360,-30.462,5.0,2010.0,5.104141,-0.104141,0.010845,1,1,-1,1,1,1,-1,-1,1,1,1
12495,40.726,51.925,5.0,2010.0,4.926934,0.073066,0.005339,1,-1,-1,-1,1,1,1,-1,1,1,1
12496,30.646,83.791,5.2,2010.0,5.240853,-0.040853,0.001669,1,1,-1,1,1,1,1,1,1,1,1


In [None]:
_conf = Conf_matrx(outlier_true_one,outlier_SO_GAAL_one,tab_orbit)

In [None]:
_conf.conf("SO-GAAL (Liu et al., 2019)")

In [None]:
twelve = eleven.append(_conf.tab)

### MO_GAAL

In [None]:
clf = MO_GAAL()
clf.fit(_df[['Latitude','Longitude','Magnitude']])
# _df['MO_GAAL_clf'] = clf.labels_

  super().__init__(name, **kwargs)


Epoch 1 of 60

Testing for epoch 1 index 1:

Testing for epoch 1 index 2:

Testing for epoch 1 index 3:

Testing for epoch 1 index 4:

Testing for epoch 1 index 5:

Testing for epoch 1 index 6:

Testing for epoch 1 index 7:

Testing for epoch 1 index 8:

Testing for epoch 1 index 9:

Testing for epoch 1 index 10:

Testing for epoch 1 index 11:

Testing for epoch 1 index 12:

Testing for epoch 1 index 13:

Testing for epoch 1 index 14:

Testing for epoch 1 index 15:

Testing for epoch 1 index 16:

Testing for epoch 1 index 17:

Testing for epoch 1 index 18:

Testing for epoch 1 index 19:

Testing for epoch 1 index 20:

Testing for epoch 1 index 21:

Testing for epoch 1 index 22:

Testing for epoch 1 index 23:

Testing for epoch 1 index 24:
Epoch 2 of 60

Testing for epoch 2 index 1:

Testing for epoch 2 index 2:

Testing for epoch 2 index 3:

Testing for epoch 2 index 4:

Testing for epoch 2 index 5:

Testing for epoch 2 index 6:

Testing for epoch 2 index 7:

Testing for epoch 2 index 

In [340]:
outlier_MO_GAAL_one = list(clf.labels_)

In [341]:
outlier_MO_GAAL_one = list(map(lambda x: 1 if x==0  else -1,outlier_MO_GAAL_one))

In [424]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'}),
          pd.DataFrame(outlier_KNN_one).rename(columns={0:'KNN'}),
          pd.DataFrame(outlier_OSVM_one).rename(columns={0:'OCSVM'}),
          pd.DataFrame(outlier_MCD_one).rename(columns={0:'MCD'}),
          pd.DataFrame(outlier_FeatureBagging_one).rename(columns={0:'Feature Bagging'}),
          pd.DataFrame(outlier_ABOD_one).rename(columns={0:'ABOD'}),
          pd.DataFrame(outlier_alibi_one).rename(columns={0:'IForest'}),
          pd.DataFrame(outlier_HBOS_one).rename(columns={0:'HBOS'}),
          pd.DataFrame(outlier_SOS_one).rename(columns={0:'SOS'}),
          pd.DataFrame(outlier_SO_GAAL_one).rename(columns={0:'SO_GAAL'}),
          pd.DataFrame(outlier_MO_GAAL_one).rename(columns={0:'MO_GAAL'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF',
                         'KNN':'KNN',
                         'OCSVM':'OCSVM',
                         'MCD':'MCD',
                         'Feature Bagging':'Feature Bagging',
                         'ABOD':'ABOD',
                         'IForest':'IForest',
                         'HBOS':'HBOS',
                         'SOS':'SOS',
                         'SO_GAAL':'SO_GAAL',
                         'MO_GAAL':'MO_GAAL'})

Unnamed: 0,Latitude,Longitude,Magnitude,Year,MagnitudeHat,Residual,Anomalious Score,GODE,LOF,KNN,OCSVM,MCD,Feature Bagging,ABOD,IForest,HBOS,SOS,SO_GAAL,MO_GAAL
0,0.663,-26.045,5.5,2010.0,5.514405,-0.014405,0.000207,1,1,1,-1,1,1,1,-1,1,1,1,1
1,-19.209,167.902,5.1,2010.0,5.114861,-0.014861,0.000221,1,1,1,1,1,1,1,1,1,1,1,1
2,-31.830,-178.135,5.0,2010.0,5.159778,-0.159778,0.025529,1,1,1,1,-1,1,1,-1,1,1,1,1
3,-19.984,168.353,5.0,2010.0,5.214340,-0.214340,0.045942,-1,1,1,1,1,1,1,1,1,1,1,1
4,50.380,153.964,5.0,2010.0,5.099783,-0.099783,0.009957,1,1,-1,1,1,-1,-1,-1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12493,-22.874,69.345,5.2,2010.0,5.039599,0.160401,0.025728,1,1,1,1,1,1,1,1,1,1,1,1
12494,42.360,-30.462,5.0,2010.0,5.104141,-0.104141,0.010845,1,1,-1,1,1,1,-1,-1,1,1,1,-1
12495,40.726,51.925,5.0,2010.0,4.926934,0.073066,0.005339,1,-1,-1,-1,1,1,1,-1,1,1,1,-1
12496,30.646,83.791,5.2,2010.0,5.240853,-0.040853,0.001669,1,1,-1,1,1,1,1,1,1,1,1,1


In [None]:
_conf = Conf_matrx(outlier_true_one,outlier_MO_GAAL_one,tab_orbit)

In [None]:
_conf.conf("MO-GAAL (Liu et al., 2019)")

In [None]:
thirteen = twelve.append(_conf.tab)

### LSCP

In [343]:
detectors = [KNN(), LOF(), OCSVM()]
clf = LSCP(detectors)
clf.fit(_df[['Latitude','Longitude','Magnitude']])
# _df['LSCP_clf'] = clf.labels_



LSCP(contamination=0.1,
   detector_list=[KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=5, p=2,
  radius=1.0), LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',
  metric_params=None, n_jobs=1, n_neighbors=20, novelty=True, p=2), OCSVM(cache_size=200, coef0=0.0, contamination=0.1, degree=3, gamma='auto',
   kernel='rbf', max_iter=-1, nu=0.5, shrinking=True, tol=0.001,
   verbose=False)],
   local_max_features=1.0, local_region_size=30, n_bins=3,
   random_state=RandomState(MT19937) at 0x7F44245B8240)

In [344]:
outlier_LSCP_one = list(clf.labels_)

In [345]:
outlier_LSCP_one = list(map(lambda x: 1 if x==0  else -1,outlier_LSCP_one))

In [425]:
pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'}),
          pd.DataFrame(outlier_KNN_one).rename(columns={0:'KNN'}),
          pd.DataFrame(outlier_OSVM_one).rename(columns={0:'OCSVM'}),
          pd.DataFrame(outlier_MCD_one).rename(columns={0:'MCD'}),
          pd.DataFrame(outlier_FeatureBagging_one).rename(columns={0:'Feature Bagging'}),
          pd.DataFrame(outlier_ABOD_one).rename(columns={0:'ABOD'}),
          pd.DataFrame(outlier_alibi_one).rename(columns={0:'IForest'}),
          pd.DataFrame(outlier_HBOS_one).rename(columns={0:'HBOS'}),
          pd.DataFrame(outlier_SOS_one).rename(columns={0:'SOS'}),
          pd.DataFrame(outlier_SO_GAAL_one).rename(columns={0:'SO_GAAL'}),
          pd.DataFrame(outlier_MO_GAAL_one).rename(columns={0:'MO_GAAL'}),
          pd.DataFrame(outlier_LSCP_one).rename(columns={0:'LSCP'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF',
                         'KNN':'KNN',
                         'OCSVM':'OCSVM',
                         'MCD':'MCD',
                         'Feature Bagging':'Feature Bagging',
                         'ABOD':'ABOD',
                         'IForest':'IForest',
                         'HBOS':'HBOS',
                         'SOS':'SOS',
                         'SO_GAAL':'SO_GAAL',
                         'MO_GAAL':'MO_GAAL',
                         'LSCP':'LSCP'})

Unnamed: 0,Latitude,Longitude,Magnitude,Year,MagnitudeHat,Residual,Anomalious Score,GODE,LOF,KNN,OCSVM,MCD,Feature Bagging,ABOD,IForest,HBOS,SOS,SO_GAAL,MO_GAAL,LSCP
0,0.663,-26.045,5.5,2010.0,5.514405,-0.014405,0.000207,1,1,1,-1,1,1,1,-1,1,1,1,1,1
1,-19.209,167.902,5.1,2010.0,5.114861,-0.014861,0.000221,1,1,1,1,1,1,1,1,1,1,1,1,1
2,-31.830,-178.135,5.0,2010.0,5.159778,-0.159778,0.025529,1,1,1,1,-1,1,1,-1,1,1,1,1,1
3,-19.984,168.353,5.0,2010.0,5.214340,-0.214340,0.045942,-1,1,1,1,1,1,1,1,1,1,1,1,1
4,50.380,153.964,5.0,2010.0,5.099783,-0.099783,0.009957,1,1,-1,1,1,-1,-1,-1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12493,-22.874,69.345,5.2,2010.0,5.039599,0.160401,0.025728,1,1,1,1,1,1,1,1,1,1,1,1,1
12494,42.360,-30.462,5.0,2010.0,5.104141,-0.104141,0.010845,1,1,-1,1,1,1,-1,-1,1,1,1,-1,-1
12495,40.726,51.925,5.0,2010.0,4.926934,0.073066,0.005339,1,-1,-1,-1,1,1,1,-1,1,1,1,-1,-1
12496,30.646,83.791,5.2,2010.0,5.240853,-0.040853,0.001669,1,1,-1,1,1,1,1,1,1,1,1,1,-1


In [None]:
_conf = Conf_matrx(outlier_true_one,outlier_LSCP_one,tab_orbit)

In [None]:
_conf.conf("LSCP (Zhao et al., 2019)")

In [None]:
fourteen_orbit = thirteen.append(_conf.tab)

## Result

In [426]:
_df_rst = pd.concat([_df,pd.DataFrame(_df['Residual']**2).rename(columns={'Residual':'rst'}),pd.DataFrame(outlier_simul_one),
          pd.DataFrame(lof_rst).rename(columns={0:'LOF'}),
          pd.DataFrame(outlier_KNN_one).rename(columns={0:'KNN'}),
          pd.DataFrame(outlier_OSVM_one).rename(columns={0:'OCSVM'}),
          pd.DataFrame(outlier_MCD_one).rename(columns={0:'MCD'}),
          pd.DataFrame(outlier_FeatureBagging_one).rename(columns={0:'Feature Bagging'}),
          pd.DataFrame(outlier_ABOD_one).rename(columns={0:'ABOD'}),
          pd.DataFrame(outlier_alibi_one).rename(columns={0:'IForest'}),
          pd.DataFrame(outlier_HBOS_one).rename(columns={0:'HBOS'}),
          pd.DataFrame(outlier_SOS_one).rename(columns={0:'SOS'}),
          pd.DataFrame(outlier_SO_GAAL_one).rename(columns={0:'SO_GAAL'}),
          pd.DataFrame(outlier_MO_GAAL_one).rename(columns={0:'MO_GAAL'}),
          pd.DataFrame(outlier_LSCP_one).rename(columns={0:'LSCP'})],axis=1).\
          rename(columns={'Latitude':'Latitude',
                          'Longitude':'Longitude',
                          'Magnitude':'Magnitude',
                          'Year':'Year',
                          'MagnitudeHat':'MagnitudeHat',
                          'Residual':'Residual',
                          'rst':'Anomalious Score',
                          0:'GODE',
                          'LOF':'LOF',
                         'KNN':'KNN',
                         'OCSVM':'OCSVM',
                         'MCD':'MCD',
                         'Feature Bagging':'Feature Bagging',
                         'ABOD':'ABOD',
                         'IForest':'IForest',
                         'HBOS':'HBOS',
                         'SOS':'SOS',
                         'SO_GAAL':'SO_GAAL',
                         'MO_GAAL':'MO_GAAL',
                         'LSCP':'LSCP'})

In [427]:
_df_compa = _df_rst.copy()

In [488]:
cmp = pd.concat([pd.read_csv('05_10.csv'),pd.read_csv('10_15.csv')]).iloc[:,[0,1,2,4]].rename(columns={'latitude':'Latitude','longitude':'Longitude','mag':'Magnitude'}).reset_index().iloc[:,1:]

In [489]:
cmp

Unnamed: 0,time,Latitude,Longitude,Magnitude
0,2010-12-31T16:30:54.520Z,0.663,-26.045,5.5
1,2010-12-31T04:11:03.180Z,-19.209,167.902,5.1
2,2010-12-30T23:47:03.930Z,-31.830,-178.135,5.0
3,2010-12-30T21:22:30.350Z,-19.984,168.353,5.0
4,2010-12-30T19:56:36.380Z,50.380,153.964,5.0
...,...,...,...,...
24099,2010-01-01T14:31:10.130Z,-22.874,69.345,5.2
24100,2010-01-01T09:37:11.290Z,42.360,-30.462,5.0
24101,2010-01-01T02:34:56.050Z,40.726,51.925,5.0
24102,2010-01-01T02:22:23.820Z,30.646,83.791,5.2


#### Haiti

In [457]:
_df_compa[_df_compa['Latitude']==18.443] # Haiti(lat=18.4430, lon=-72.5710)

Unnamed: 0,Latitude,Longitude,Magnitude,Year,MagnitudeHat,Residual,Anomalious Score,GODE,LOF,KNN,OCSVM,MCD,Feature Bagging,ABOD,IForest,HBOS,SOS,SO_GAAL,MO_GAAL,LSCP
2326,18.443,-72.571,7.0,2010.0,6.659386,0.340614,0.116018,-1,-1,1,-1,1,-1,-1,-1,-1,-1,1,-1,-1
12429,18.443,-72.571,7.0,2010.0,6.386632,0.613368,0.37622,-1,-1,1,1,1,-1,-1,-1,-1,-1,1,-1,-1


In [490]:
cmp[cmp['Latitude']==18.443]

Unnamed: 0,time,Latitude,Longitude,Magnitude
2326,2010-01-12T21:53:10.060Z,18.443,-72.571,7.0
24035,2010-01-12T21:53:10.060Z,18.443,-72.571,7.0


#### Iquique

In [455]:
_df_compa[_df_compa['Latitude']==-32.6953] # Iquiqeu lat=-32.6953, lon=-71.4416

Unnamed: 0,Latitude,Longitude,Magnitude,Year,MagnitudeHat,Residual,Anomalious Score,GODE,LOF,KNN,OCSVM,MCD,Feature Bagging,ABOD,IForest,HBOS,SOS,SO_GAAL,MO_GAAL,LSCP
2997,-32.6953,-71.4416,6.4,2014.0,6.088353,0.311647,0.097124,-1,-1,1,1,1,-1,1,-1,-1,-1,1,1,1


In [491]:
cmp[cmp['Latitude']==-32.6953]

Unnamed: 0,time,Latitude,Longitude,Magnitude
14603,2014-08-23T22:32:23.320Z,-32.6953,-71.4416,6.4


In [496]:
_df_compa[_df_compa['Latitude']==-20.5709] # Iquiqeu lat=-32.6953, lon=-71.4416

Unnamed: 0,Latitude,Longitude,Magnitude,Year,MagnitudeHat,Residual,Anomalious Score,GODE,LOF,KNN,OCSVM,MCD,Feature Bagging,ABOD,IForest,HBOS,SOS,SO_GAAL,MO_GAAL,LSCP
3723,-20.5709,-70.4931,7.7,2014.0,6.991148,0.708852,0.502471,-1,-1,1,-1,-1,-1,1,-1,-1,-1,1,-1,-1


In [495]:
cmp[cmp['Latitude']==-20.5709]

Unnamed: 0,time,Latitude,Longitude,Magnitude
15329,2014-04-03T02:43:13.110Z,-20.5709,-70.4931,7.7


#### Sichan

In [456]:
_df_compa[_df_compa['Latitude']==30.3080] # sichan(lat=30.3080, lon=102.8880)

Unnamed: 0,Latitude,Longitude,Magnitude,Year,MagnitudeHat,Residual,Anomalious Score,GODE,LOF,KNN,OCSVM,MCD,Feature Bagging,ABOD,IForest,HBOS,SOS,SO_GAAL,MO_GAAL,LSCP
5137,30.308,102.888,6.6,2013.0,5.904218,0.695782,0.484113,-1,-1,1,1,1,-1,-1,-1,-1,-1,1,1,-1


In [492]:
cmp[cmp['Latitude']==30.3080]

Unnamed: 0,time,Latitude,Longitude,Magnitude
16743,2013-04-20T00:02:47.540Z,30.308,102.888,6.6


In [441]:
_df_compa.sort_values('Anomalious Score',ascending=False).iloc[:50,:].reset_index()

Unnamed: 0,index,Latitude,Longitude,Magnitude,Year,MagnitudeHat,Residual,Anomalious Score,GODE,LOF,...,OCSVM,MCD,Feature Bagging,ABOD,IForest,HBOS,SOS,SO_GAAL,MO_GAAL,LSCP
0,2064,-36.122,-72.898,8.8,2010.0,7.572545,1.227455,1.506646,-1,-1,...,1,-1,-1,-1,-1,-1,-1,1,-1,-1
1,3752,-19.6097,-70.7691,8.2,2014.0,7.075499,1.124501,1.264504,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,1,-1,-1
2,12167,-36.122,-72.898,8.8,2010.0,7.742429,1.057571,1.118457,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,1,-1,-1
3,9660,36.281,141.111,7.9,2011.0,6.912847,0.987153,0.97447,-1,-1,...,1,1,-1,-1,-1,-1,-1,1,-1,-1
4,6877,0.802,92.463,8.2,2012.0,7.353106,0.846894,0.71723,-1,1,...,1,-1,-1,-1,-1,-1,-1,1,-1,-1
5,7938,-21.611,-179.528,7.3,2011.0,6.497713,0.802287,0.643665,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,1,1,-1
6,10593,-3.487,100.082,7.8,2010.0,7.008107,0.791893,0.627095,-1,-1,...,-1,1,-1,-1,-1,-1,-1,1,-1,-1
7,3835,-19.9807,-70.7022,6.7,2014.0,5.913563,0.786437,0.618483,-1,-1,...,1,1,-1,1,-1,-1,-1,1,1,1
8,3281,-29.9772,-177.7247,6.9,2014.0,6.129969,0.770031,0.592947,-1,1,...,-1,-1,-1,1,-1,-1,-1,1,-1,1
9,4997,-23.009,-177.232,7.4,2013.0,6.648946,0.751054,0.564082,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,1,1,-1
