In [4]:
%matplotlib inline
import numpy as np
import sys
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import importlib
import time
import datetime
import segment_cluster as sc
import pandas as pd
import os
import fnmatch
from scipy.stats import zscore

importlib.reload(sc)
sys.stdout.flush()
np.random.seed(0)

seg_lens=[10, 50, 100,150,200]

In [5]:
rho_outlier = "data/synthetic_rhos_v2.csv"

# datasets used, every item corresponds to one complete test of the algorithm
outlier_datasets = ["data/synthetic_flats.csv", 
                    "data/synthetic_boxes.csv", 
                    "data/synthetic_boxes_thick.csv", 
                    "data/synthetic_sines.csv", 
                    "data/synthetic_sines_low.csv", 
                    "data/synthetic_sines_long.csv", 
                    "data/synthetic_sines_short.csv", 
                    "data/synthetic_sines_low_long.csv", 
                    "data/synthetic_sines_low_short.csv",
                    "data/synthetic_flats.csv", 
                    "data/synthetic_boxes.csv", 
                    "data/synthetic_sines.csv"]

rho_data = np.loadtxt(rho_outlier, delimiter=',')
box_data = np.loadtxt("data/synthetic_boxes.csv", delimiter=',')

In [6]:
training_data = rho_data[0:50]
all_train_segments=[]#loop throught the light curves of a given class and segments them
for time_series in training_data:
    train_segments=sc.segmentation(time_series, 
                                50, 
                                1,
                                time_stamps=False)
    all_train_segments.append(train_segments)
all_train_segments=np.vstack(all_train_segments)

In [27]:
from sklearn import svm

OCSVM = svm.OneClassSVM(nu=0.05, kernel="rbf")
OCSVM.fit(all_train_segments)

OneClassSVM(cache_size=200, coef0=0.0, degree=3, gamma='auto', kernel='rbf',
      max_iter=-1, nu=0.05, random_state=None, shrinking=True, tol=0.001,
      verbose=False)

In [7]:
valid_data_rho = rho_data[200:250]
all_valid_rho_segments=[]#loop throught the light curves of a given class and segments them
for time_series in training_data:
    train_segments=sc.segmentation(time_series, 
                                50, 
                                1,
                                time_stamps=False)
    all_valid_rho_segments.append(train_segments)
all_valid_rho_segments=np.vstack(all_train_segments)

In [8]:
valid_data_box = box_data[0:50]
all_valid_box_segments=[]#loop throught the light curves of a given class and segments them
for time_series in training_data:
    train_segments=sc.segmentation(time_series, 
                                50, 
                                1,
                                time_stamps=False)
    all_valid_box_segments.append(train_segments)
all_valid_box_segments=np.vstack(all_train_segments)

In [30]:
predict_rho = OCSVM.predict(all_valid_rho_segments)

In [31]:
predict_box = OCSVM.predict(all_valid_box_segments)

In [32]:
np.unique(predict_box, return_counts=True)

(array([-1,  1]), array([18040,  4510]))

In [33]:
np.unique(predict_rho, return_counts=True)

(array([-1,  1]), array([18040,  4510]))

In [9]:
flat_data = np.loadtxt("data/synthetic_flats.csv", delimiter=',')
valid_data_flat = flat_data[0:50]
all_valid_flat_segments=[]#loop throught the light curves of a given class and segments them
for time_series in training_data:
    train_segments=sc.segmentation(time_series, 
                                50, 
                                1,
                                time_stamps=False)
    all_valid_flat_segments.append(train_segments)
all_valid_flat_segments=np.vstack(all_train_segments)

In [35]:
predict_flat = OCSVM.predict(all_valid_flat_segments)

In [36]:
np.unique(predict_flat, return_counts=True)

(array([-1,  1]), array([18040,  4510]))

In [32]:
import pyod
from pyod.models.loci import LOCI
from sklearn.neighbors import LocalOutlierFactor
from sklearn.ensemble import IsolationForest

In [13]:
validation_segments = np.vstack((all_valid_rho_segments, all_valid_box_segments, all_valid_flat_segments))

In [30]:
LOF = LocalOutlierFactor(n_neighbors=50, algorithm="auto", leaf_size=30, metric="minkowski", p=2, metric_params=None, contamination=1e-10, novelty=True, n_jobs=None)
LOF.fit(all_train_segments)

LocalOutlierFactor(algorithm='auto', contamination=1e-10, leaf_size=300,
                   metric='minkowski', metric_params=None, n_jobs=None,
                   n_neighbors=50, novelty=True, p=2)

In [31]:
np.unique(LOF.predict(validation_segments), return_counts=True)

(array([1]), array([67650]))

In [41]:
IF = IsolationForest(n_estimators=1000, max_samples="auto", contamination=0., max_features=1.0, bootstrap=False, n_jobs=None, behaviour="old", random_state=None, verbose=1, warm_start=False)
IF.fit(all_train_segments)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    6.5s finished


IsolationForest(behaviour='old', bootstrap=False, contamination=0.0,
                max_features=1.0, max_samples='auto', n_estimators=1000,
                n_jobs=None, random_state=None, verbose=1, warm_start=False)

In [42]:
np.unique(IF.predict(validation_segments), return_counts=True)



(array([1]), array([67650]))

In [None]:
training-test outcome of MSE: compare test variance
rho-flat bad:low
rho-box bad:low
rho-widebox ok:low
rho-sines good:same
rho-smallsine bad:low
rho-widesine good:same
rho-narrowsine good:same
rho-smallwidesine bad:low
rho-smallshortsine bad:low
flat-rho good:high
box-rho ok:high
sine-rho bad:same


In [1]:
#train on flat, test against rho
tsscod_flat = sc.TSSCOD(k_clusters = 10, seg_len = 100)
flat_data = np.loadtxt("data/synthetic_flats.csv", delimiter=',')
tsscod_flat.train(flat_data[0:150], random_state = 0)

NameError: name 'sc' is not defined

In [None]:
import numpy as np
from scipy.spatial.distance import euclidean

from fastdtw import fastdtw

x = np.array([[1,1], [2,2], [3,3], [4,4], [5,5]])
y = np.array([[2,2], [3,3], [4,4]])
distance, path = fastdtw(x, y, dist=euclidean)
print(distance)