In [1]:
import warnings
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.neighbors import LocalOutlierFactor
from sklearn.neighbors import KNeighborsClassifier

In [2]:
warnings.simplefilter('ignore')

In [3]:
data_raw = pd.read_csv('exo-planets.csv')
data_raw.shape

(5657, 3198)

In [4]:
data = data_raw.drop('exo', axis=1)
data = ((data-data.mean())/data.std())*10
data['exo'] = data_raw['exo']

data.shape

(5657, 3198)

In [5]:
data['exo'].value_counts()

0    5615
1      42
Name: exo, dtype: int64

In [6]:
data.head()

Unnamed: 0,FLUX.1,FLUX.2,FLUX.3,FLUX.4,FLUX.5,FLUX.6,FLUX.7,FLUX.8,FLUX.9,FLUX.10,...,FLUX.3189,FLUX.3190,FLUX.3191,FLUX.3192,FLUX.3193,FLUX.3194,FLUX.3195,FLUX.3196,FLUX.3197,exo
0,-0.042358,-0.05043,-0.0784,-0.112735,-0.104943,-0.142377,-0.125858,-0.123104,-0.11752,-0.186855,...,-0.171166,-0.17278,-0.14299,-0.121205,-0.103707,0.266792,0.315605,0.240603,0.154565,1
1,-0.106226,-0.106303,-0.115569,-0.118845,-0.12281,-0.120011,-0.104925,-0.11815,-0.119527,-0.140792,...,-0.150485,-0.15587,-0.153558,-0.132259,-0.121799,0.237097,0.283416,0.247548,0.196882,1
2,0.168782,0.164302,0.154913,0.131402,0.118007,0.112189,0.13228,0.140745,0.136595,0.155235,...,-0.137025,-0.144864,-0.154614,-0.135573,-0.120951,0.237039,0.2627,0.19284,0.113913,1
3,0.0696,0.074759,0.055004,0.038766,0.055659,0.046124,0.071081,0.061463,0.075513,0.069755,...,-0.142063,-0.148984,-0.141951,-0.127109,-0.124446,0.238803,0.269494,0.226338,0.192649,1
4,-0.620292,-0.618665,-0.616768,-0.610479,-0.562517,-0.534441,-0.497414,-0.542629,-0.56512,-0.617125,...,-0.25973,-0.245196,-0.223769,-0.22306,-0.208394,0.011268,0.05435,-0.024525,-0.180585,1


---

In [7]:
test = data.loc[:89]
train = data.loc[90:].drop('exo', axis=1)

print(train.shape, test.shape)

(5567, 3197) (90, 3198)


In [8]:
mod = LocalOutlierFactor(contamination=0.18, n_neighbors=25, novelty=False)
preds = mod.fit_predict(train)

In [9]:
train['pred'] = preds
train_clean = train.drop(train[train['pred']==-1].index, axis=0)
train_clean.drop('pred', axis=1, inplace=True)

In [10]:
mod = LocalOutlierFactor(contamination=0.18, n_neighbors=15, novelty=True)
mod.fit(train_clean)
preds_exo = mod.predict(test.drop('exo', axis=1))

In [11]:
test['pred'] = preds_exo
test['pred'] = np.where(test['pred']==-1, 1, 0)

In [12]:
tn, fp, fn, tp = confusion_matrix(
    test['exo'].values, 
    test['pred'].values,
    ).ravel()

In [13]:
acc = (tp+tn)/(tn+fp+fn+tp)
spec = tn/(tn+fp)
sens = tp/(tp+fn)

print(acc, spec, sens, sep='\n')

0.6888888888888889
0.6875
0.6904761904761905


---

In [14]:
test = data.loc[:89]
train = data.copy(deep=True)

print(train.shape, test.shape)

(5657, 3198) (90, 3198)


In [15]:
mod = KNeighborsClassifier(n_neighbors=20)
mod.fit(train.drop('exo', axis=1), train['exo']);

In [16]:
preds = mod.predict(test.drop('exo', axis=1))
true = test['exo'].values

In [17]:
tn, fp, fn, tp = confusion_matrix(
    true, 
    preds,
    ).ravel()

In [18]:
acc = (tp+tn)/(tn+fp+fn+tp)
spec = tn/(tn+fp)
sens = tp/(tp+fn)

print(acc, spec, sens, sep='\n')

0.5333333333333333
1.0
0.0
