In [1]:
from sklearn.metrics import auc
from sklearn.ensemble import IsolationForest

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pickle
import os, json

In [29]:
#preprocess
def get_data(hierClass, outlier):
    
    feature_list = pd.read_pickle('../../data_raw/features_RF_model.pkl')
    train = pd.read_pickle('../../data/train_data_filtered.pkl')
    test = pd.read_pickle('../../data/test_data_filtered.pkl')
    
    train = train[train.hierClass==hierClass]
    train['hierPredtmp'] = train['hierClass']
    
    test = test[test['hierPred']==hierClass]
    test['hierPredtmp'] = test['hierPred']
    
    test = pd.concat([test, train[train.classALeRCE==outlier]], sort=False)
    train = train[train.classALeRCE!=outlier]
    
    train = train[feature_list]
    train[np.isnan(train)] = -999
    
    test_features = test[feature_list]
    test_features[np.isnan(test_features)] = -999
    
    test_labels = np.where((test['classALeRCE']!= outlier), 0, test['classALeRCE']) #Inlier:0
    test_labels = np.where(test['hierClass']!=test['hierPredtmp'], 1, test_labels) #Type1:1
    test_labels = np.where(test['classALeRCE']==outlier, 2, test_labels) #Type2:2
    test_labels = test_labels.reshape(-1,).astype('int8')
    return train, test_features, test_labels

In [30]:
#utils 
def save_metrics(metrics, root_dir, mode='val'):
    """save all the metrics."""
    mt_dir = os.path.join(root_dir, 'metrics_{}.json'.format(mode))
    with open(mt_dir, 'w') as mt:
        json.dump(metrics, mt)

def plot_histogram(in_scores, out1_scores, out2_scores, directory):
    plt.hist(in_scores, color='k', alpha=0.3, density=True, label='Inlier')
    plt.hist(out1_scores, color='b', alpha=0.3, density=True, label='Outlier1')
    plt.hist(out2_scores, color='purple', alpha=0.3, density=True, label='Outlier2')
    plt.title('Inliers vs Outliers (IForest)')
    plt.legend()
    plt.savefig('{}/plots/histogram.png'.format(directory))
    plt.close()
    
def compute_metrics(scores, labels, plot_hist=True, directory=None):
    """
    Computing the Area under the curve ROC and PR.
    """
    in_scores = scores[labels==0]
    out2_scores = scores[labels==1]
    out1_scores = scores[labels==2]

    auroc_out1, aupr_out1 = compute_roc_pr(in_scores, out1_scores)
    auroc_out2, aupr_out2 = compute_roc_pr(in_scores, out2_scores)
    auroc_out12, aupr_out12 = compute_roc_pr(in_scores, 
                              np.concatenate((out1_scores, out2_scores), axis=0))
    metrics = {'AU ROC Out1': auroc_out1,
               'AU PR Out1': aupr_out1,
               'AU ROC Out2': auroc_out2,
               'AU PR Out2': aupr_out2,
               'AU ROC Out12': auroc_out12,
               'AU PR Out12': aupr_out12,
               }
    if plot_hist:
        plot_histogram(in_scores, out1_scores, out2_scores, directory)
    return metrics

def compute_roc_pr(inliers_scores, outlier_scores):
    auroc_score = auroc(inliers_scores, outlier_scores)
    aupr_score = aupr(inliers_scores, outlier_scores)
    return auroc_score, aupr_score

def auroc(in_scores, out_scores):
    scores = np.concatenate((in_scores, out_scores), axis=0)
    start = np.min(scores)
    end = np.max(scores)   
    gap = (end- start)/100000

    aurocBase = 0.0
    fprTemp = 1.0
    tprs = []
    fprs = []
    for delta in np.arange(start, end, gap):
        tpr = np.sum(np.sum(out_scores < delta)) / np.float(len(out_scores))
        fpr = np.sum(np.sum(in_scores <= delta)) / np.float(len(in_scores))
        tprs.append(tpr)
        fprs.append(fpr)
    return auc(fprs, tprs)

def aupr(in_scores, out_scores):
    scores = np.concatenate((in_scores, out_scores), axis=0)
    start = np.min(scores)
    end = np.max(scores)   
    gap = (end- start)/100000
    
    precisions = []
    recalls = []
    for delta in np.arange(start, end, gap):
        tp = np.sum(np.sum(out_scores <= delta)) #/ np.float(len(out_scores))
        fp = np.sum(np.sum(in_scores <= delta)) #/ np.float(len(in_scores))
        if tp + fp == 0: continue
        precision = tp / (tp + fp)
        recall = tp / np.float(len(out_scores))
        precisions.append(precision)
        recalls.append(recall)
    return auc(recalls, precisions)

def print_metrics(metrics, directory):
    for metric, value in metrics.items():
        print("{}: {:.3f}".format(metric, value))
    print("##########################################")

In [31]:
def train(hierClass, outliers, train_features, directory):
    clf = IsolationForest(n_estimators=100, max_samples=256, contamination='auto').fit(train_features)
    pickle.dump(clf, open('{}/model.pkl'.format(directory), 'wb'))
    return clf

def test(model, test_features, test_labels, directory):
    scores = model.score_samples(test_features)
    metrics = compute_metrics(scores, test_labels, plot_hist=True, directory=directory)
    print_metrics(metrics, directory)
    save_metrics(metrics, directory, 'test')

## Transient Experiments

In [32]:
hierClass = 'Transient'
outliers = ['SLSN',
            'SNII',
            'SNIa',
            'SNIbc']

for outlier in outliers:
    for run in range(5):
        directory = 'results/{}_{}_run{}'.format(hierClass, outlier, run)
        if not os.path.exists(directory):
            os.makedirs(directory)
        plots_dir = '{}/plots'.format(directory)
        if not os.path.exists(plots_dir):
            os.makedirs(plots_dir)
        
        train_features, test_features, test_labels = get_data(hierClass, outlier)
        model = train(hierClass, outlier, train_features, directory)
        test(model, test_features, test_labels, directory)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)


AU ROC Out1: 0.675
AU PR Out1: 0.185
AU ROC Out2: 0.823
AU PR Out2: 0.341
AU ROC Out12: 0.748
AU PR Out12: 0.403
##########################################


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)


AU ROC Out1: 0.690
AU PR Out1: 0.193
AU ROC Out2: 0.804
AU PR Out2: 0.336
AU ROC Out12: 0.746
AU PR Out12: 0.406
##########################################


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)


AU ROC Out1: 0.725
AU PR Out1: 0.208
AU ROC Out2: 0.840
AU PR Out2: 0.349
AU ROC Out12: 0.781
AU PR Out12: 0.424
##########################################


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)


AU ROC Out1: 0.673
AU PR Out1: 0.185
AU ROC Out2: 0.801
AU PR Out2: 0.345
AU ROC Out12: 0.736
AU PR Out12: 0.415
##########################################


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)


AU ROC Out1: 0.610
AU PR Out1: 0.155
AU ROC Out2: 0.806
AU PR Out2: 0.330
AU ROC Out12: 0.706
AU PR Out12: 0.389
##########################################


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)


AU ROC Out1: 0.718
AU PR Out1: 0.742
AU ROC Out2: 0.835
AU PR Out2: 0.402
AU ROC Out12: 0.730
AU PR Out12: 0.774
##########################################


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)


AU ROC Out1: 0.736
AU PR Out1: 0.750
AU ROC Out2: 0.848
AU PR Out2: 0.426
AU ROC Out12: 0.748
AU PR Out12: 0.783
##########################################


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)


AU ROC Out1: 0.712
AU PR Out1: 0.733
AU ROC Out2: 0.859
AU PR Out2: 0.456
AU ROC Out12: 0.727
AU PR Out12: 0.772
##########################################


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)


AU ROC Out1: 0.704
AU PR Out1: 0.726
AU ROC Out2: 0.841
AU PR Out2: 0.406
AU ROC Out12: 0.718
AU PR Out12: 0.761
##########################################


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)


AU ROC Out1: 0.722
AU PR Out1: 0.756
AU ROC Out2: 0.832
AU PR Out2: 0.438
AU ROC Out12: 0.733
AU PR Out12: 0.788
##########################################


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)
  % (self.max_samples, n_samples))


AU ROC Out1: 0.448
AU PR Out1: 0.913
AU ROC Out2: 0.735
AU PR Out2: 0.566
AU ROC Out12: 0.458
AU PR Out12: 0.919
##########################################


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)
  % (self.max_samples, n_samples))


AU ROC Out1: 0.472
AU PR Out1: 0.916
AU ROC Out2: 0.762
AU PR Out2: 0.575
AU ROC Out12: 0.482
AU PR Out12: 0.922
##########################################


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)
  % (self.max_samples, n_samples))


AU ROC Out1: 0.490
AU PR Out1: 0.919
AU ROC Out2: 0.731
AU PR Out2: 0.552
AU ROC Out12: 0.498
AU PR Out12: 0.924
##########################################


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)
  % (self.max_samples, n_samples))


AU ROC Out1: 0.478
AU PR Out1: 0.916
AU ROC Out2: 0.750
AU PR Out2: 0.572
AU ROC Out12: 0.487
AU PR Out12: 0.922
##########################################


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)
  % (self.max_samples, n_samples))


AU ROC Out1: 0.497
AU PR Out1: 0.920
AU ROC Out2: 0.699
AU PR Out2: 0.552
AU ROC Out12: 0.504
AU PR Out12: 0.925
##########################################


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)


AU ROC Out1: 0.541
AU PR Out1: 0.278
AU ROC Out2: 0.827
AU PR Out2: 0.376
AU ROC Out12: 0.629
AU PR Out12: 0.448
##########################################


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)


AU ROC Out1: 0.545
AU PR Out1: 0.271
AU ROC Out2: 0.807
AU PR Out2: 0.335
AU ROC Out12: 0.626
AU PR Out12: 0.428
##########################################


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)


AU ROC Out1: 0.536
AU PR Out1: 0.266
AU ROC Out2: 0.807
AU PR Out2: 0.359
AU ROC Out12: 0.615
AU PR Out12: 0.430
##########################################


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)


AU ROC Out1: 0.540
AU PR Out1: 0.271
AU ROC Out2: 0.798
AU PR Out2: 0.300
AU ROC Out12: 0.620
AU PR Out12: 0.416
##########################################


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)


AU ROC Out1: 0.529
AU PR Out1: 0.265
AU ROC Out2: 0.804
AU PR Out2: 0.335
AU ROC Out12: 0.614
AU PR Out12: 0.422
##########################################


## Stochastic Experiments

In [None]:
hierClass = 'Stochastic'
outliers = ['AGN',
            'Blazar',
            'CV/Nova',
            'QSO',
            'YSO']

for outlier in outliers:
    for run in range(5):
        directory = 'results/{}_{}_run{}'.format(hierClass, outlier, run)
        if not os.path.exists(directory):
            os.makedirs(directory)
        plots_dir = '{}/plots'.format(directory)
        if not os.path.exists(plots_dir):
            os.makedirs(plots_dir)
        
        train_features, test_features, test_labels = get_data(hierClass, outlier)
        model = train(hierClass, outlier, train_features, directory)
        test(model, test_features, test_labels, directory)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)


AU ROC Out1: 0.589
AU PR Out1: 0.437
AU ROC Out2: 0.860
AU PR Out2: 0.023
AU ROC Out12: 0.591
AU PR Out12: 0.442
##########################################


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)


AU ROC Out1: 0.615
AU PR Out1: 0.453
AU ROC Out2: 0.858
AU PR Out2: 0.022
AU ROC Out12: 0.617
AU PR Out12: 0.458
##########################################


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)


AU ROC Out1: 0.589
AU PR Out1: 0.438
AU ROC Out2: 0.866
AU PR Out2: 0.023
AU ROC Out12: 0.591
AU PR Out12: 0.442
##########################################


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)


AU ROC Out1: 0.600
AU PR Out1: 0.444
AU ROC Out2: 0.878
AU PR Out2: 0.028
AU ROC Out12: 0.602
AU PR Out12: 0.449
##########################################


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)


AU ROC Out1: 0.597
AU PR Out1: 0.445
AU ROC Out2: 0.879
AU PR Out2: 0.026
AU ROC Out12: 0.600
AU PR Out12: 0.450
##########################################


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._where(-key, value, inplace=True)


## Periodic Experiments

In [None]:
hierClass = 'Periodic'
outliers = ['CEP',
            'DSCT',
            'E',
            'RRL',
            'LPV']


for outlier in outliers:
    for run in range(5):
        directory = 'results/{}_{}_run{}'.format(hierClass, outlier, run)
        if not os.path.exists(directory):
            os.makedirs(directory)
        plots_dir = '{}/plots'.format(directory)
        if not os.path.exists(plots_dir):
            os.makedirs(plots_dir)
        
        train_features, test_features, test_labels = get_data(hierClass, outlier)
        model = train(hierClass, outlier, train_features, directory)
        test(model, test_features, test_labels, directory)