In [2]:
import numpy as np
import pandas as pd
from datetime import datetime
from EMD_lib import cubic_spline_3pts, EMD
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest
#from pyts.decomposition import SingularSpectrumAnalysis
from SSA_lib import SingularSpectrumAnalysis
from pykalman import KalmanFilter

SyntaxError: invalid syntax (SSA_lib.py, line 37)

In [32]:
! pip install pykalman

Collecting pykalman
  Downloading pykalman-0.9.5.tar.gz (228 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Using legacy 'setup.py install' for pykalman, since package 'wheel' is not installed.
Installing collected packages: pykalman
    Running setup.py install for pykalman: started
    Running setup.py install for pykalman: finished with status 'done'
Successfully installed pykalman-0.9.5


In [20]:
def get_data(df, mode='original', emd=False, ssa=False, kalman=False, dimension='one', window_size=0, intervals=2):
    delta_seconds = df.timestamp.diff() / np.timedelta64(1, 's')
    delta_seconds[np.where(delta_seconds == 0)[0]] = 1e-3
    delta_seconds = delta_seconds[1:]

    AP = df['meter_reading']
    
    if emd:
        emd = EMD()
        emd.emd(np.array(df['meter_reading']))
        imfs, res = emd.get_imfs_and_residue()
        try:
            df['meter_reading'] = imfs[2] + df['meter_reading'].mean()
        except IndexError: 
            df['meter_reading'] = imfs[1] + df['meter_reading'].mean()
    elif ssa:
        N = len(df)
        L = 20 
        K = N - L
        X = np.array(df['meter_reading'])
        X_stack = np.column_stack([X[i:i+L] for i in range(0,K)])
        groups = [np.arange(i, i + 5) for i in range(0, 11, 5)]

        ssa = SingularSpectrumAnalysis(window_size=L) #, groups=groups)
        X_ssa = ssa.fit_transform(X_stack)
        X = np.concatenate((X_ssa[0, 0],df['meter_reading'][K:]))
        df['meter_reading'] = X
    elif kalman:
        X = np.array(df['meter_reading'])
        kf = KalmanFilter(transition_matrices = [1],
        observation_matrices = [1],
        initial_state_mean = 0,
        initial_state_covariance = 1,
        observation_covariance=1,
        transition_covariance=.01)

        mean, cov = kf.filter(X)
        mean, std = mean.squeeze(), np.std(cov.squeeze())
        df['meter_reading'] = mean
        
    if mode == 'original':
        if dimension == 'one':
            return np.array(df['meter_reading'])
        elif dimension == 'multi':
            df['hour'] = df['timestamp'].dt.hour
            #df['day'] = df['timestamp'].dt.day
            df['weekday'] = df['timestamp'].dt.dayofweek
            #df['month'] = df['timestampevent_timestamp'].dt.month
            X = np.array(df[['meter_reading', 'hour', 'weekday']])
            return X
        elif dimension == 'multi_intervals':
            X = []
            
            for raw in range(len(df)):
                X_raw = []
                X_raw.append(df.meter_reading[raw])
                weekday = df['timestamp'][raw].dayofweek
                for i in range(7):
                    if i == weekday:
                        X_raw.append(1)
                    else:
                        X_raw.append(0)
                hour = df['timestamp'][raw].hour
                for i in range(24):
                    if i == hour:
                        X_raw.append(1)
                    else:
                        X_raw.append(0)
                minute = df['timestamp'][raw].minute
                for i in range(60 // intervals):
                    if (minute >= (i * intervals)) and (minute < ((i + 1) * intervals)):
                        X_raw.append(1)
                    else:
                        X_raw.append(0)
                X.append(X_raw)
            return np.array(X)
            
    elif mode == 'windows':
        ind = np.where(df.timestamp.apply(datetime.time) == datetime.strptime('00:00:00', '%H:%M:%S').time())[0]
        ans = np.zeros((len(ind), 24))
        prev = ind[0]
        k = 0
        #print(ind)
        #print(len(ind))
        if dimension == 'one':
            #print(ind)
            for i in ind[1:]:
                #print(prev, i, np.array(df['meter_reading'].loc[prev:i - 1])[:, np.newaxis].shape[0])
                shape_ = np.array(df['meter_reading'].loc[prev:i - 1])[:, np.newaxis].shape[0]
                #print(shape_)
                if shape_ != 24:
                    print('ATTENTION')
                    if shape_ > 720:
                        day = np.array(df['meter_reading'].loc[prev:i])\
                        [shape_ - 720:]
                    else:
                        day = np.array(df['meter_reading'].loc[prev - (720 - shape_):i])
                        
                else:
                    day = np.array(df['meter_reading'].loc[prev:i - 1])

                ans[k] = day
                prev = i
                k += 1
            return np.array(ans)[:-1]#.reshape(7, 720)
        elif dimension == 'multi':
            ans = np.zeros((len(ind), 24, 5))
            df['hour'] = df['timestamp'].dt.hour
            df['day'] = df['timestamp'].dt.day
            df['weekday'] = df['timestamp'].dt.dayofweek
            df['month'] = df['timestamp'].dt.month
            k = 0
            for i in ind[1:]:
                shape_ = np.array(df['meter_reading'].loc[prev:i - 1])[:, np.newaxis].shape[0]
                if shape_ != 24:
                    print('ATTENTIOT')
                    if shape_ > 720:
                        day = np.array(df['meter_reading'].loc[prev:i])\
                        [shape_ - 720:]
                        hour = np.array(df['hour'].loc[prev:i])\
                        [shape_ - 720:]
                        day_ = np.array(df['day'].loc[prev:i])\
                        [shape_ - 720:]
                        weekday = np.array(df['weekday'].loc[prev:i])\
                        [shape_ - 720:]
                        month = np.array(df['month'].loc[prev:i])\
                        [shape_ - 720:]
                    else:
                        day = np.array(df['meter_reading'].loc[prev - (720 - shape_):i])
                        hour = np.array(df['hour'].loc[prev - (720 - shape_):i])
                        day_ = np.array(df['day'].loc[prev - (720 - shape_):i])
                        weekday = np.array(df['weekday'].loc[prev - (720 - shape_):i])
                        month = np.array(df['month'].loc[prev - (720 - shape_):i])
                else:
                    day = np.array(df['meter_reading'].loc[prev:i - 1])
                    hour = np.array(df['hour'].loc[prev:i - 1])
                    day_ = np.array(df['day'].loc[prev:i -1 ])
                    weekday = np.array(df['weekday'].loc[prev:i-1 ])
                    month = np.array(df['month'].loc[prev:i-1 ])
                
                ans[k] = np.concatenate((day[:, np.newaxis], hour[:, np.newaxis], day_[:, np.newaxis]\
                                         , weekday[:, np.newaxis], month[:, np.newaxis]), axis=1)
                prev = i
                k += 1
            return np.array(ans)[:-1]
        elif dimension == 'multi_intervals':
            X = []
            #print(ind)
            for j in ind[1:]:
                #print(prev, j, j - prev)
                X_day = []
                df_day = df.loc[prev:j - 1].reset_index(drop=True)
                #print(len(df_day))
                '''
                if len(df_day) != 24:
                    print('!!!!!!!!')
                if len(df_day) > 720:
                    print('BIG')
                    df_day = df_day.loc[:720].reset_index(drop=True)
                    #print(len(df_day))
                elif len(df_day) < 720:
                    print('SMALL')
                    print(len(df))
                    if len(df) < 720*2:
                        print('HIII')
                        df_day = df.loc[prev - (720 - j + prev) :j - 1].reset_index(drop=True)
                    else:
                        df_day = df.loc[prev:(720+ prev)-1].reset_index(drop=True)
                print(len(df_day))
                '''
                for raw in range(len(df_day)):
                    X_raw = []
                    X_raw.append(df_day.meter_reading[raw])
                    weekday = df_day['timestamp'][raw].dayofweek
                    for i in range(7):
                        if i == weekday:
                            X_raw.append(1)
                        else:
                            X_raw.append(0)
                    hour = df_day['timestamp'][raw].hour
                    for i in range(24):
                        if i == hour:
                            X_raw.append(1)
                        else:
                            X_raw.append(0)
                    minute = df_day['timestamp'][raw].minute
                    for i in range(60 // intervals):
                        if (minute >= (i * intervals)) and (minute < ((i + 1) * intervals)):
                            X_raw.append(1)
                        else:
                            X_raw.append(0)
                    X_day += X_raw
                X.append(X_day)
                #print(prev, j)
                prev = j
            return np.array(X)
    else:
        print('INCORRECT MODE')
        return None

In [3]:
path = r'C:\Users\Lomonosova\Downloads\lead1.0-small\lead1.0-small.csv'
elec = pd.read_csv(path)

In [4]:
elec.timestamp = pd.to_datetime(elec.timestamp)

In [5]:
buildings = []
for i in elec.building_id.unique():
    df = elec[elec.building_id == i].reset_index(drop=True)
    df = df.fillna(method='ffill')
    df = df.dropna().reset_index(drop=True)
    if df.isna().sum().sum() == 0:
        buildings.append(df)

In [6]:
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.metrics import accuracy_score as accuracy
from sklearn.metrics import f1_score

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle


# original

In [15]:
accs = []
f1 = []
k = 0
for build in buildings:
    print(k)
    k += 1
    X = get_data(build, mode='original', dimension='one', ssa=False) 
    X = (X - X.mean()) / X.std()
    #print(X)
    y = np.array(build.anomaly)
    #print(y)
    
    y[np.where(y == 1)[0]] = -1
    y[np.where(y == 0)[0]] = 1
    print(np.unique(y))
    X_norm = X[np.where(y == 1)[0]]
    y_norm = y[np.where(y == 1)[0]]
    X_anom = X[np.where(y != 1)[0]]
    y_anom = y[np.where(y != 1)[0]]
    
    X_norm_train, X_norm_test, y_norm_train,y_norm_test = train_test_split(X_norm, y_norm, test_size=0.2)
    X_anom_train, X_anom_test, y_anom_train, y_anom_test = train_test_split(X_anom, y_anom, test_size=0.9)
    
    #print(X_norm_train.shape)
    X_train = np.concatenate((X_norm_train, X_anom_train), axis=0)
    y_train = np.concatenate((y_norm_train, y_anom_train), axis=0)
    X_test = np.concatenate((X_norm_test, X_anom_test), axis=0)
    y_test = np.concatenate((y_norm_test, y_anom_test), axis=0)
   
    #print(X_train.shape)
    #print(y_train.shape)
    X_train, y_train = shuffle(X_train, y_train, random_state=0)
    X_test, y_test = shuffle(X_test, y_test, random_state=0)

    model = IsolationForest().fit(X_train[:, np.newaxis], y_train)
    pred = model.predict(X_test[:, np.newaxis])
    accs.append(accuracy(y_test, pred))

    print(np.unique(pred))
    f1.append(f1_score(y_test, pred))
print('ACCURACY: ', np.mean(accs))
print('F1 SCORE: ', np.mean(f1))

0
[-1  1]
[-1  1]
1
[-1  1]
[-1  1]
2
[-1  1]
[-1  1]
3
[-1  1]
[-1  1]
4
[-1  1]
[-1  1]
5
[-1  1]
[-1  1]
6
[-1  1]
[-1  1]
7
[-1  1]
[-1  1]
8
[-1  1]
[-1  1]
9
[-1  1]
[-1  1]
10
[-1  1]
[-1  1]
11
[-1  1]
[-1  1]
12
[-1  1]
[-1  1]
13
[-1  1]
[-1  1]
14
[-1  1]
[-1  1]
15
[-1  1]
[-1  1]
16
[-1  1]
[-1  1]
17
[-1  1]
[-1  1]
18
[-1  1]
[-1  1]
19
[-1  1]
[-1  1]
20
[-1  1]
[-1  1]
21
[-1  1]
[-1  1]
22
[-1  1]
[-1  1]
23
[-1  1]
[-1  1]
24
[-1  1]
[-1  1]
25
[-1  1]
[-1  1]
26
[-1  1]
[-1  1]
27
[-1  1]
[-1  1]
28
[-1  1]
[-1  1]
29
[-1  1]
[-1  1]
30
[-1  1]
[-1  1]
31
[-1  1]
[-1  1]
32
[-1  1]
[-1  1]
33
[-1  1]
[-1  1]
34
[-1  1]
[-1  1]
35
[-1  1]
[-1  1]
36
[-1  1]
[-1  1]
37
[-1  1]
[-1  1]
38
[-1  1]
[-1  1]
39
[-1  1]
[-1  1]
40
[-1  1]
[-1  1]
41
[-1  1]
[-1  1]
42
[-1  1]
[-1  1]
43
[-1  1]
[-1  1]
44
[-1  1]
[-1  1]
45
[-1  1]
[-1  1]
46
[-1  1]
[-1  1]
47
[-1  1]
[-1  1]
48
[-1  1]
[-1  1]
49
[-1  1]
[-1  1]
50
[-1  1]
[-1  1]
51
[-1  1]
[-1  1]
52
[-1  1]
[-1  1]
53


# original emd 

In [16]:
accs = []
f1 = []
k = 0
for build in buildings:
    print(k)
    k += 1
    X = get_data(build, mode='original', dimension='one', emd=True) 
    X = (X - X.mean()) / X.std()
    #print(X)
    y = np.array(build.anomaly)
    #print(y)
    
    y[np.where(y == 1)[0]] = -1
    y[np.where(y == 0)[0]] = 1
    print(np.unique(y))
    X_norm = X[np.where(y == 1)[0]]
    y_norm = y[np.where(y == 1)[0]]
    X_anom = X[np.where(y != 1)[0]]
    y_anom = y[np.where(y != 1)[0]]
    
    X_norm_train, X_norm_test, y_norm_train,y_norm_test = train_test_split(X_norm, y_norm, test_size=0.2)
    X_anom_train, X_anom_test, y_anom_train, y_anom_test = train_test_split(X_anom, y_anom, test_size=0.9)
    
    #print(X_norm_train.shape)
    X_train = np.concatenate((X_norm_train, X_anom_train), axis=0)
    y_train = np.concatenate((y_norm_train, y_anom_train), axis=0)
    X_test = np.concatenate((X_norm_test, X_anom_test), axis=0)
    y_test = np.concatenate((y_norm_test, y_anom_test), axis=0)
   
    #print(X_train.shape)
    #print(y_train.shape)
    X_train, y_train = shuffle(X_train, y_train, random_state=0)
    X_test, y_test = shuffle(X_test, y_test, random_state=0)

    model = IsolationForest().fit(X_train[:, np.newaxis], y_train)
    pred = model.predict(X_test[:, np.newaxis])
    accs.append(accuracy(y_test, pred))

    print(np.unique(pred))
    f1.append(f1_score(y_test, pred))
print('ACCURACY: ', np.mean(accs))
print('F1 SCORE: ', np.mean(f1))

0
[-1  1]
[-1  1]
1
[-1  1]
[-1  1]
2
[-1  1]
[-1  1]
3
[-1  1]
[-1  1]
4
[-1  1]
[-1  1]
5
[-1  1]
[-1  1]
6
[-1  1]
[-1  1]
7
[-1  1]
[-1  1]
8
[-1  1]
[-1  1]
9
[-1  1]
[-1  1]
10
[-1  1]
[-1  1]
11
[-1  1]
[-1  1]
12
[-1  1]
[-1  1]
13
[-1  1]
[-1  1]
14
[-1  1]
[-1  1]
15
[-1  1]
[-1  1]
16
[-1  1]
[-1  1]
17
[-1  1]
[-1  1]
18
[-1  1]
[-1  1]
19
[-1  1]
[-1  1]
20
[-1  1]
[-1  1]
21
[-1  1]
[-1  1]
22
[-1  1]
[-1  1]
23
[-1  1]
[-1  1]
24
[-1  1]
[-1  1]
25
[-1  1]
[-1  1]
26
[-1  1]
[-1  1]
27
[-1  1]
[-1  1]
28
[-1  1]
[-1  1]
29
[-1  1]
[-1  1]
30
[-1  1]
[-1  1]
31
[-1  1]
[-1  1]
32
[-1  1]
[-1  1]
33
[-1  1]
[-1  1]
34
[-1  1]
[-1  1]
35
[-1  1]
[-1  1]
36
[-1  1]
[-1  1]
37
[-1  1]
[-1  1]
38
[-1  1]
[-1  1]
39
[-1  1]
[-1  1]
40
[-1  1]
[-1  1]
41
[-1  1]
[-1  1]
42
[-1  1]
[-1  1]
43
[-1  1]
[-1  1]
44
[-1  1]
[-1  1]
45
[-1  1]
[-1  1]
46
[-1  1]
[-1  1]
47
[-1  1]
[-1  1]
48
[-1  1]
[-1  1]
49
[-1  1]
[-1  1]
50
[-1  1]
[-1  1]
51
[-1  1]
[-1  1]
52
[-1  1]
[-1  1]
53


# original multi

In [18]:
accs = []
f1 = []
k = 0
for build in buildings:
    print(k)
    k += 1
    X = get_data(build, mode='original', dimension='multi', ssa=False) 
    X = (X - X.mean()) / X.std()
    #print(X)
    y = np.array(build.anomaly)
    #print(y)
    
    y[np.where(y == 1)[0]] = -1
    y[np.where(y == 0)[0]] = 1
    print(np.unique(y))
    X_norm = X[np.where(y == 1)[0]]
    y_norm = y[np.where(y == 1)[0]]
    X_anom = X[np.where(y != 1)[0]]
    y_anom = y[np.where(y != 1)[0]]
    
    X_norm_train, X_norm_test, y_norm_train,y_norm_test = train_test_split(X_norm, y_norm, test_size=0.2)
    X_anom_train, X_anom_test, y_anom_train, y_anom_test = train_test_split(X_anom, y_anom, test_size=0.9)
    
    #print(X_norm_train.shape)
    X_train = np.concatenate((X_norm_train, X_anom_train), axis=0)
    y_train = np.concatenate((y_norm_train, y_anom_train), axis=0)
    X_test = np.concatenate((X_norm_test, X_anom_test), axis=0)
    y_test = np.concatenate((y_norm_test, y_anom_test), axis=0)
   
    #print(X_train.shape)
    #print(y_train.shape)
    X_train, y_train = shuffle(X_train, y_train, random_state=0)
    X_test, y_test = shuffle(X_test, y_test, random_state=0)

    model = IsolationForest().fit(X_train, y_train)
    pred = model.predict(X_test)
    accs.append(accuracy(y_test, pred))

    print(np.unique(pred))
    f1.append(f1_score(y_test, pred))
print('ACCURACY: ', np.mean(accs))
print('F1 SCORE: ', np.mean(f1))

0
[-1  1]
[-1  1]
1
[-1  1]
[-1  1]
2
[-1  1]
[-1  1]
3
[-1  1]
[-1  1]
4
[-1  1]
[-1  1]
5
[-1  1]
[-1  1]
6
[-1  1]
[-1  1]
7
[-1  1]
[-1  1]
8
[-1  1]
[-1  1]
9
[-1  1]
[-1  1]
10
[-1  1]
[-1  1]
11
[-1  1]
[-1  1]
12
[-1  1]
[-1  1]
13
[-1  1]
[-1  1]
14
[-1  1]
[-1  1]
15
[-1  1]
[-1  1]
16
[-1  1]
[-1  1]
17
[-1  1]
[-1  1]
18
[-1  1]
[-1  1]
19
[-1  1]
[-1  1]
20
[-1  1]
[-1  1]
21
[-1  1]
[-1  1]
22
[-1  1]
[-1  1]
23
[-1  1]
[-1  1]
24
[-1  1]
[-1  1]
25
[-1  1]
[-1  1]
26
[-1  1]
[-1  1]
27
[-1  1]
[-1  1]
28
[-1  1]
[-1  1]
29
[-1  1]
[-1  1]
30
[-1  1]
[-1  1]
31
[-1  1]
[-1  1]
32
[-1  1]
[-1  1]
33
[-1  1]
[-1  1]
34
[-1  1]
[-1  1]
35
[-1  1]
[-1  1]
36
[-1  1]
[-1  1]
37
[-1  1]
[-1  1]
38
[-1  1]
[-1  1]
39
[-1  1]
[-1  1]
40
[-1  1]
[-1  1]
41
[-1  1]
[-1  1]
42
[-1  1]
[-1  1]
43
[-1  1]
[-1  1]
44
[-1  1]
[-1  1]
45
[-1  1]
[-1  1]
46
[-1  1]
[-1  1]
47
[-1  1]
[-1  1]
48
[-1  1]
[-1  1]
49
[-1  1]
[-1  1]
50
[-1  1]
[-1  1]
51
[-1  1]
[-1  1]
52
[-1  1]
[-1  1]
53


# original multi emd


In [23]:
accs = []
f1 = []
k = 0
for build in buildings:
    print(k)
    k += 1
    try:
        X = get_data(build, mode='original', dimension='multi', emd=True) 
    except IndexError:
        continue
    X = (X - X.mean()) / X.std()
    #print(X)
    y = np.array(build.anomaly)
    #print(y)
    
    y[np.where(y == 1)[0]] = -1
    y[np.where(y == 0)[0]] = 1
    print(np.unique(y))
    X_norm = X[np.where(y == 1)[0]]
    y_norm = y[np.where(y == 1)[0]]
    X_anom = X[np.where(y != 1)[0]]
    y_anom = y[np.where(y != 1)[0]]
    
    X_norm_train, X_norm_test, y_norm_train,y_norm_test = train_test_split(X_norm, y_norm, test_size=0.2)
    X_anom_train, X_anom_test, y_anom_train, y_anom_test = train_test_split(X_anom, y_anom, test_size=0.9)
    
    #print(X_norm_train.shape)
    X_train = np.concatenate((X_norm_train, X_anom_train), axis=0)
    y_train = np.concatenate((y_norm_train, y_anom_train), axis=0)
    X_test = np.concatenate((X_norm_test, X_anom_test), axis=0)
    y_test = np.concatenate((y_norm_test, y_anom_test), axis=0)
   
    #print(X_train.shape)
    #print(y_train.shape)
    X_train, y_train = shuffle(X_train, y_train, random_state=0)
    X_test, y_test = shuffle(X_test, y_test, random_state=0)

    model = IsolationForest().fit(X_train, y_train)
    pred = model.predict(X_test)
    accs.append(accuracy(y_test, pred))

    print(np.unique(pred))
    f1.append(f1_score(y_test, pred))
print('ACCURACY: ', np.mean(accs))
print('F1 SCORE: ', np.mean(f1))

0
1
2
[-1  1]
[-1  1]
3
4
[-1  1]
[-1  1]
5
[-1  1]
[-1  1]
6
7
[-1  1]
[-1  1]
8
[-1  1]
[-1  1]
9
[-1  1]
[-1  1]
10
[-1  1]
[-1  1]
11
[-1  1]
[-1  1]
12
13
[-1  1]
[-1  1]
14
[-1  1]
[-1  1]
15
[-1  1]
[-1  1]
16
[-1  1]
[-1  1]
17
[-1  1]
[-1  1]
18
[-1  1]
[-1  1]
19
20
[-1  1]
[-1  1]
21
[-1  1]
[-1  1]
22
23
[-1  1]
[-1  1]
24
[-1  1]
[-1  1]
25
[-1  1]
[-1  1]
26
[-1  1]
[-1  1]
27
[-1  1]
[-1  1]
28
[-1  1]
[-1  1]
29
[-1  1]
[-1  1]
30
[-1  1]
[-1  1]
31
[-1  1]
[-1  1]
32
[-1  1]
[-1  1]
33
[-1  1]
[-1  1]
34
[-1  1]
[-1  1]
35
[-1  1]
[-1  1]
36
[-1  1]
[-1  1]
37
[-1  1]
[-1  1]
38
[-1  1]
[-1  1]
39
[-1  1]
[-1  1]
40
[-1  1]
[-1  1]
41
[-1  1]
[-1  1]
42
[-1  1]
[-1  1]
43
[-1  1]
[-1  1]
44
[-1  1]
[-1  1]
45
[-1  1]
[-1  1]
46
[-1  1]
[-1  1]
47
[-1  1]
[-1  1]
48
[-1  1]
[-1  1]
49
[-1  1]
[-1  1]
50
51
[-1  1]
[-1  1]
52
[-1  1]
[-1  1]
53
[-1  1]
[-1  1]
54
55
[-1  1]
[-1  1]
56
57
[-1  1]
[-1  1]
58
[-1  1]
[-1  1]
59
[-1  1]
[-1  1]
60
61
[-1  1]
[-1  1]
62
[-1  

# original multi_int 2


In [24]:
accs = []
f1 = []
k = 0
for build in buildings:
    print(k)
    k += 1
    try:
        X = get_data(build, mode='original', dimension='multi_intervals', emd=False) 
    except IndexError:
        continue
    X = (X - X.mean()) / X.std()
    #print(X)
    y = np.array(build.anomaly)
    #print(y)
    
    y[np.where(y == 1)[0]] = -1
    y[np.where(y == 0)[0]] = 1
    print(np.unique(y))
    X_norm = X[np.where(y == 1)[0]]
    y_norm = y[np.where(y == 1)[0]]
    X_anom = X[np.where(y != 1)[0]]
    y_anom = y[np.where(y != 1)[0]]
    
    X_norm_train, X_norm_test, y_norm_train,y_norm_test = train_test_split(X_norm, y_norm, test_size=0.2)
    X_anom_train, X_anom_test, y_anom_train, y_anom_test = train_test_split(X_anom, y_anom, test_size=0.9)
    
    #print(X_norm_train.shape)
    X_train = np.concatenate((X_norm_train, X_anom_train), axis=0)
    y_train = np.concatenate((y_norm_train, y_anom_train), axis=0)
    X_test = np.concatenate((X_norm_test, X_anom_test), axis=0)
    y_test = np.concatenate((y_norm_test, y_anom_test), axis=0)
   
    #print(X_train.shape)
    #print(y_train.shape)
    X_train, y_train = shuffle(X_train, y_train, random_state=0)
    X_test, y_test = shuffle(X_test, y_test, random_state=0)

    model = IsolationForest().fit(X_train, y_train)
    pred = model.predict(X_test)
    accs.append(accuracy(y_test, pred))

    print(np.unique(pred))
    f1.append(f1_score(y_test, pred))
print('ACCURACY: ', np.mean(accs))
print('F1 SCORE: ', np.mean(f1))

0
[-1  1]
[1]
1
[-1  1]
[1]
2
[-1  1]
[1]
3
[-1  1]
[1]
4
[-1  1]
[1]
5
[-1  1]
[1]
6
[-1  1]
[1]
7
[-1  1]
[1]
8
[-1  1]
[1]
9
[-1  1]
[1]
10
[-1  1]
[1]
11
[-1  1]
[1]
12
[-1  1]
[1]
13
[-1  1]
[1]
14
[-1  1]
[1]
15
[-1  1]
[1]
16
[-1  1]
[1]
17
[-1  1]
[1]
18
[-1  1]
[1]
19
[-1  1]
[1]
20
[-1  1]
[1]
21
[-1  1]
[1]
22
[-1  1]
[1]
23
[-1  1]
[1]
24
[-1  1]
[1]
25
[-1  1]
[1]
26
[-1  1]
[1]
27
[-1  1]
[1]
28
[-1  1]
[1]
29
[-1  1]
[1]
30
[-1  1]
[1]
31
[-1  1]
[1]
32
[-1  1]
[1]
33
[-1  1]
[1]
34
[-1  1]
[1]
35
[-1  1]
[1]
36
[-1  1]
[1]
37
[-1  1]
[1]
38
[-1  1]
[1]
39
[-1  1]
[1]
40
[-1  1]
[1]
41
[-1  1]
[1]
42
[-1  1]
[1]
43
[-1  1]
[1]
44
[-1  1]
[1]
45
[-1  1]
[1]
46
[-1  1]
[1]
47
[-1  1]
[1]
48
[-1  1]
[1]
49
[-1  1]
[1]
50
[-1  1]
[1]
51
[-1  1]
[1]
52
[-1  1]
[1]
53
[-1  1]
[1]
54
[-1  1]
[1]
55
[-1  1]
[1]
56
[-1  1]
[1]
57
[-1  1]
[1]
58
[-1  1]
[1]
59
[-1  1]
[1]
60
[-1  1]
[1]
61
[-1  1]
[1]
62
[-1  1]
[1]
63
[-1  1]
[1]
64
[-1  1]
[1]
65
[-1  1]
[1]
66
[-1  1]
[1]
67
[-

# original multi_int 5


In [25]:
accs = []
f1 = []
k = 0
for build in buildings:
    print(k)
    k += 1
    try:
        X = get_data(build, mode='original', dimension='multi_intervals', emd=False, intervals = 5) 
    except IndexError:
        continue
    X = (X - X.mean()) / X.std()
    #print(X)
    y = np.array(build.anomaly)
    #print(y)
    
    y[np.where(y == 1)[0]] = -1
    y[np.where(y == 0)[0]] = 1
    print(np.unique(y))
    X_norm = X[np.where(y == 1)[0]]
    y_norm = y[np.where(y == 1)[0]]
    X_anom = X[np.where(y != 1)[0]]
    y_anom = y[np.where(y != 1)[0]]
    
    X_norm_train, X_norm_test, y_norm_train,y_norm_test = train_test_split(X_norm, y_norm, test_size=0.2)
    X_anom_train, X_anom_test, y_anom_train, y_anom_test = train_test_split(X_anom, y_anom, test_size=0.9)
    
    #print(X_norm_train.shape)
    X_train = np.concatenate((X_norm_train, X_anom_train), axis=0)
    y_train = np.concatenate((y_norm_train, y_anom_train), axis=0)
    X_test = np.concatenate((X_norm_test, X_anom_test), axis=0)
    y_test = np.concatenate((y_norm_test, y_anom_test), axis=0)
   
    #print(X_train.shape)
    #print(y_train.shape)
    X_train, y_train = shuffle(X_train, y_train, random_state=0)
    X_test, y_test = shuffle(X_test, y_test, random_state=0)

    model = IsolationForest().fit(X_train, y_train)
    pred = model.predict(X_test)
    accs.append(accuracy(y_test, pred))

    print(np.unique(pred))
    f1.append(f1_score(y_test, pred))
print('ACCURACY: ', np.mean(accs))
print('F1 SCORE: ', np.mean(f1))

0
[-1  1]
[1]
1
[-1  1]
[1]
2
[-1  1]
[1]
3
[-1  1]
[1]
4
[-1  1]
[1]
5
[-1  1]
[1]
6
[-1  1]
[1]
7
[-1  1]
[1]
8
[-1  1]
[1]
9
[-1  1]
[1]
10
[-1  1]
[1]
11
[-1  1]
[1]
12
[-1  1]
[1]
13
[-1  1]
[1]
14
[-1  1]
[1]
15
[-1  1]
[1]
16
[-1  1]
[1]
17
[-1  1]
[1]
18
[-1  1]
[1]
19
[-1  1]
[1]
20
[-1  1]
[1]
21
[-1  1]
[1]
22
[-1  1]
[1]
23
[-1  1]
[1]
24
[-1  1]
[1]
25
[-1  1]
[1]
26
[-1  1]
[1]
27
[-1  1]
[1]
28
[-1  1]
[1]
29
[-1  1]
[1]
30
[-1  1]
[1]
31
[-1  1]
[1]
32
[-1  1]
[1]
33
[-1  1]
[1]
34
[-1  1]
[1]
35
[-1  1]
[1]
36
[-1  1]
[1]
37
[-1  1]
[1]
38
[-1  1]
[1]
39
[-1  1]
[1]
40
[-1  1]
[1]
41
[-1  1]
[1]
42
[-1  1]
[1]
43
[-1  1]
[1]
44
[-1  1]
[1]
45
[-1  1]
[1]
46
[-1  1]
[1]
47
[-1  1]
[1]
48
[-1  1]
[1]
49
[-1  1]
[1]
50
[-1  1]
[1]
51
[-1  1]
[1]
52
[-1  1]
[1]
53
[-1  1]
[1]
54
[-1  1]
[1]
55
[-1  1]
[1]
56
[-1  1]
[1]
57
[-1  1]
[1]
58
[-1  1]
[1]
59
[-1  1]
[1]
60
[-1  1]
[1]
61
[-1  1]
[1]
62
[-1  1]
[1]
63
[-1  1]
[1]
64
[-1  1]
[1]
65
[-1  1]
[1]
66
[-1  1]
[1]
67
[-

# original multi_int 15


In [None]:
accs = []
f1 = []
k = 0
for build in buildings:
    print(k)
    k += 1
    try:
        X = get_data(build, mode='original', dimension='multi_intervals', emd=False, intervals = 15) 
    except IndexError:
        continue
    X = (X - X.mean()) / X.std()
    #print(X)
    y = np.array(build.anomaly)
    #print(y)
    
    y[np.where(y == 1)[0]] = -1
    y[np.where(y == 0)[0]] = 1
    print(np.unique(y))
    X_norm = X[np.where(y == 1)[0]]
    y_norm = y[np.where(y == 1)[0]]
    X_anom = X[np.where(y != 1)[0]]
    y_anom = y[np.where(y != 1)[0]]
    
    X_norm_train, X_norm_test, y_norm_train,y_norm_test = train_test_split(X_norm, y_norm, test_size=0.2)
    X_anom_train, X_anom_test, y_anom_train, y_anom_test = train_test_split(X_anom, y_anom, test_size=0.9)
    
    #print(X_norm_train.shape)
    X_train = np.concatenate((X_norm_train, X_anom_train), axis=0)
    y_train = np.concatenate((y_norm_train, y_anom_train), axis=0)
    X_test = np.concatenate((X_norm_test, X_anom_test), axis=0)
    y_test = np.concatenate((y_norm_test, y_anom_test), axis=0)
   
    #print(X_train.shape)
    #print(y_train.shape)
    X_train, y_train = shuffle(X_train, y_train, random_state=0)
    X_test, y_test = shuffle(X_test, y_test, random_state=0)

    model = IsolationForest().fit(X_train, y_train)
    pred = model.predict(X_test)
    accs.append(accuracy(y_test, pred))

    print(np.unique(pred))
    f1.append(f1_score(y_test, pred))
print('ACCURACY: ', np.mean(accs))
print('F1 SCORE: ', np.mean(f1))

# original multi_int emd 2


In [None]:
accs = []
f1 = []
k = 0
for build in buildings:
    print(k)
    k += 1
    try:
        X = get_data(build, mode='original', dimension='multi_intervals', emd=True, intervals = 2) 
    except IndexError:
        continue
    X = (X - X.mean()) / X.std()
    #print(X)
    y = np.array(build.anomaly)
    #print(y)
    
    y[np.where(y == 1)[0]] = -1
    y[np.where(y == 0)[0]] = 1
    print(np.unique(y))
    X_norm = X[np.where(y == 1)[0]]
    y_norm = y[np.where(y == 1)[0]]
    X_anom = X[np.where(y != 1)[0]]
    y_anom = y[np.where(y != 1)[0]]
    
    X_norm_train, X_norm_test, y_norm_train,y_norm_test = train_test_split(X_norm, y_norm, test_size=0.2)
    X_anom_train, X_anom_test, y_anom_train, y_anom_test = train_test_split(X_anom, y_anom, test_size=0.9)
    
    #print(X_norm_train.shape)
    X_train = np.concatenate((X_norm_train, X_anom_train), axis=0)
    y_train = np.concatenate((y_norm_train, y_anom_train), axis=0)
    X_test = np.concatenate((X_norm_test, X_anom_test), axis=0)
    y_test = np.concatenate((y_norm_test, y_anom_test), axis=0)
   
    #print(X_train.shape)
    #print(y_train.shape)
    X_train, y_train = shuffle(X_train, y_train, random_state=0)
    X_test, y_test = shuffle(X_test, y_test, random_state=0)

    model = IsolationForest().fit(X_train, y_train)
    pred = model.predict(X_test)
    accs.append(accuracy(y_test, pred))

    print(np.unique(pred))
    f1.append(f1_score(y_test, pred))
print('ACCURACY: ', np.mean(accs))
print('F1 SCORE: ', np.mean(f1))

# original multi_int emd 5


In [27]:
accs = []
f1 = []
k = 0
for build in buildings:
    print(k)
    k += 1
    try:
        X = get_data(build, mode='original', dimension='multi_intervals', emd=True, intervals = 5) 
    except IndexError:
        continue
    X = (X - X.mean()) / X.std()
    #print(X)
    y = np.array(build.anomaly)
    #print(y)
    
    y[np.where(y == 1)[0]] = -1
    y[np.where(y == 0)[0]] = 1
    print(np.unique(y))
    X_norm = X[np.where(y == 1)[0]]
    y_norm = y[np.where(y == 1)[0]]
    X_anom = X[np.where(y != 1)[0]]
    y_anom = y[np.where(y != 1)[0]]
    
    X_norm_train, X_norm_test, y_norm_train,y_norm_test = train_test_split(X_norm, y_norm, test_size=0.2)
    X_anom_train, X_anom_test, y_anom_train, y_anom_test = train_test_split(X_anom, y_anom, test_size=0.9)
    
    #print(X_norm_train.shape)
    X_train = np.concatenate((X_norm_train, X_anom_train), axis=0)
    y_train = np.concatenate((y_norm_train, y_anom_train), axis=0)
    X_test = np.concatenate((X_norm_test, X_anom_test), axis=0)
    y_test = np.concatenate((y_norm_test, y_anom_test), axis=0)
   
    #print(X_train.shape)
    #print(y_train.shape)
    X_train, y_train = shuffle(X_train, y_train, random_state=0)
    X_test, y_test = shuffle(X_test, y_test, random_state=0)

    model = IsolationForest().fit(X_train, y_train)
    pred = model.predict(X_test)
    accs.append(accuracy(y_test, pred))

    print(np.unique(pred))
    f1.append(f1_score(y_test, pred))
print('ACCURACY: ', np.mean(accs))
print('F1 SCORE: ', np.mean(f1))

0
1
2
3
4
5
6
7
8
9
10
11
[-1  1]
[1]
12
13
14
[-1  1]
[1]
15
16
17
18
19
20
21
[-1  1]
[1]
22
23
24
25
[-1  1]
[1]
26
27
[-1  1]
[1]
28
29
[-1  1]
[1]
30
31
32
[-1  1]
[1]
33
34
[-1  1]
[1]
35
[-1  1]
[1]
36
[-1  1]
[1]
37
38
[-1  1]
[1]
39
40
[-1  1]
[1]
41
[-1  1]
[1]
42
[-1  1]
[1]
43
44
45
[-1  1]
[1]
46
47
[-1  1]
[1]
48
[-1  1]
[1]
49
50
51
52
[-1  1]
[1]
53
[-1  1]
[1]
54
55
[-1  1]
[1]
56
57
[-1  1]
[1]
58
59
[-1  1]
[1]
60
61
[-1  1]
[1]
62
63
64
65
66
67
68
69
70
71
72
73
[-1  1]
[1]
74
75
76
[-1  1]
[1]
77
78
79
80
81
82
[-1  1]
[1]
83
84
85
86
87
[-1  1]
[1]
88
[-1  1]
[1]
89
90
91
92
93
94
95
[-1  1]
[1]
96
[-1  1]
[1]
97
98
99
[-1  1]
[1]
100
[-1  1]
[1]
101
102
[-1  1]
[1]
103
104
105
[-1  1]
[1]
106
[-1  1]
[1]
107
108
109
[-1  1]
[1]
110
111
112
113
114
[-1  1]
[1]
115
116
[-1  1]
[1]
117
118
[-1  1]
[1]
119
[-1  1]
[1]
120
[-1  1]
[1]
121
[-1  1]
[1]
122
123
124
125
126
[-1  1]
[1]
127
[-1  1]
[1]
128
[-1  1]
[1]
129
130
131
[-1  1]
[1]
132
[-1  1]
[1]
133
134
[-1  1

# original multi_int emd 15


In [28]:
accs = []
f1 = []
k = 0
for build in buildings:
    print(k)
    k += 1
    try:
        X = get_data(build, mode='original', dimension='multi_intervals', emd=True, intervals = 15) 
    except IndexError:
        continue
    X = (X - X.mean()) / X.std()
    #print(X)
    y = np.array(build.anomaly)
    #print(y)
    
    y[np.where(y == 1)[0]] = -1
    y[np.where(y == 0)[0]] = 1
    print(np.unique(y))
    X_norm = X[np.where(y == 1)[0]]
    y_norm = y[np.where(y == 1)[0]]
    X_anom = X[np.where(y != 1)[0]]
    y_anom = y[np.where(y != 1)[0]]
    
    X_norm_train, X_norm_test, y_norm_train,y_norm_test = train_test_split(X_norm, y_norm, test_size=0.2)
    X_anom_train, X_anom_test, y_anom_train, y_anom_test = train_test_split(X_anom, y_anom, test_size=0.9)
    
    #print(X_norm_train.shape)
    X_train = np.concatenate((X_norm_train, X_anom_train), axis=0)
    y_train = np.concatenate((y_norm_train, y_anom_train), axis=0)
    X_test = np.concatenate((X_norm_test, X_anom_test), axis=0)
    y_test = np.concatenate((y_norm_test, y_anom_test), axis=0)
   
    #print(X_train.shape)
    #print(y_train.shape)
    X_train, y_train = shuffle(X_train, y_train, random_state=0)
    X_test, y_test = shuffle(X_test, y_test, random_state=0)

    model = IsolationForest().fit(X_train, y_train)
    pred = model.predict(X_test)
    accs.append(accuracy(y_test, pred))

    print(np.unique(pred))
    f1.append(f1_score(y_test, pred))
print('ACCURACY: ', np.mean(accs))
print('F1 SCORE: ', np.mean(f1))

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
[-1  1]
[1]
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
[-1  1]
[1]
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
[-1  1]
[1]
89
90
91
92
93
94
95
96
97
98
99
100
[-1  1]
[1]
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
[-1  1]
[1]
151
152
[-1  1]
[1]
153
154
155
156
157
158
159
[-1  1]
[1]
160
161
162
163
164
165
166
167
168
169
170
171
172
173
[-1  1]
[1]
174
175
176
177
178
[-1  1]
[1]
179
180
181
182
183
184
185
186
187
[-1  1]
[1]
188
189
190
191
192
193
194
195
196
197
198
199
ACCURACY:  0.8835390634461724
F1 SCORE:  0.9371886291911187


# original SSA 


In [26]:
accs = []
f1 = []
k = 0
for build in buildings:
    print(k)
    k += 1
    try:
        X = get_data(build, mode='original', dimension='one', ssa=True, intervals = 2) 
    except IndexError:
        continue
    X = (X - X.mean()) / X.std()
    #print(X)
    y = np.array(build.anomaly)
    #print(y)
    
    y[np.where(y == 1)[0]] = -1
    y[np.where(y == 0)[0]] = 1
    print(np.unique(y))
    X_norm = X[np.where(y == 1)[0]]
    y_norm = y[np.where(y == 1)[0]]
    X_anom = X[np.where(y != 1)[0]]
    y_anom = y[np.where(y != 1)[0]]
    
    X_norm_train, X_norm_test, y_norm_train,y_norm_test = train_test_split(X_norm, y_norm, test_size=0.2)
    X_anom_train, X_anom_test, y_anom_train, y_anom_test = train_test_split(X_anom, y_anom, test_size=0.9)
    
    #print(X_norm_train.shape)
    X_train = np.concatenate((X_norm_train, X_anom_train), axis=0)
    y_train = np.concatenate((y_norm_train, y_anom_train), axis=0)
    X_test = np.concatenate((X_norm_test, X_anom_test), axis=0)
    y_test = np.concatenate((y_norm_test, y_anom_test), axis=0)
   
    #print(X_train.shape)
    #print(y_train.shape)
    X_train, y_train = shuffle(X_train, y_train, random_state=0)
    X_test, y_test = shuffle(X_test, y_test, random_state=0)

    model = IsolationForest().fit(X_train[:, np.newaxis], y_train)
    pred = model.predict(X_test[:, np.newaxis])
    accs.append(accuracy(y_test, pred))

    print(np.unique(pred))
    f1.append(f1_score(y_test, pred))
print('ACCURACY: ', np.mean(accs))
print('F1 SCORE: ', np.mean(f1))

0


NameError: name 'SingularSpectrumAnalysis' is not defined

# original multi SSA 


In [None]:
accs = []
f1 = []
k = 0
for build in buildings:
    print(k)
    k += 1
    try:
        X = get_data(build, mode='original', dimension='multi', ssa=True, intervals = 15) 
    except IndexError:
        continue
    X = (X - X.mean()) / X.std()
    #print(X)
    y = np.array(build.anomaly)
    #print(y)
    
    y[np.where(y == 1)[0]] = -1
    y[np.where(y == 0)[0]] = 1
    print(np.unique(y))
    X_norm = X[np.where(y == 1)[0]]
    y_norm = y[np.where(y == 1)[0]]
    X_anom = X[np.where(y != 1)[0]]
    y_anom = y[np.where(y != 1)[0]]
    
    X_norm_train, X_norm_test, y_norm_train,y_norm_test = train_test_split(X_norm, y_norm, test_size=0.2)
    X_anom_train, X_anom_test, y_anom_train, y_anom_test = train_test_split(X_anom, y_anom, test_size=0.9)
    
    #print(X_norm_train.shape)
    X_train = np.concatenate((X_norm_train, X_anom_train), axis=0)
    y_train = np.concatenate((y_norm_train, y_anom_train), axis=0)
    X_test = np.concatenate((X_norm_test, X_anom_test), axis=0)
    y_test = np.concatenate((y_norm_test, y_anom_test), axis=0)
   
    #print(X_train.shape)
    #print(y_train.shape)
    X_train, y_train = shuffle(X_train, y_train, random_state=0)
    X_test, y_test = shuffle(X_test, y_test, random_state=0)

    model = IsolationForest().fit(X_train, y_train)
    pred = model.predict(X_test)
    accs.append(accuracy(y_test, pred))

    print(np.unique(pred))
    f1.append(f1_score(y_test, pred))
print('ACCURACY: ', np.mean(accs))
print('F1 SCORE: ', np.mean(f1))

# original multi_int SSA 2



In [None]:
accs = []
f1 = []
k = 0
for build in buildings:
    print(k)
    k += 1
    try:
        X = get_data(build, mode='original', dimension='multi_intervals', ssa=True, intervals = 2) 
    except IndexError:
        continue
    X = (X - X.mean()) / X.std()
    #print(X)
    y = np.array(build.anomaly)
    #print(y)
    
    y[np.where(y == 1)[0]] = -1
    y[np.where(y == 0)[0]] = 1
    print(np.unique(y))
    X_norm = X[np.where(y == 1)[0]]
    y_norm = y[np.where(y == 1)[0]]
    X_anom = X[np.where(y != 1)[0]]
    y_anom = y[np.where(y != 1)[0]]
    
    X_norm_train, X_norm_test, y_norm_train,y_norm_test = train_test_split(X_norm, y_norm, test_size=0.2)
    X_anom_train, X_anom_test, y_anom_train, y_anom_test = train_test_split(X_anom, y_anom, test_size=0.9)
    
    #print(X_norm_train.shape)
    X_train = np.concatenate((X_norm_train, X_anom_train), axis=0)
    y_train = np.concatenate((y_norm_train, y_anom_train), axis=0)
    X_test = np.concatenate((X_norm_test, X_anom_test), axis=0)
    y_test = np.concatenate((y_norm_test, y_anom_test), axis=0)
   
    #print(X_train.shape)
    #print(y_train.shape)
    X_train, y_train = shuffle(X_train, y_train, random_state=0)
    X_test, y_test = shuffle(X_test, y_test, random_state=0)

    model = IsolationForest().fit(X_train, y_train)
    pred = model.predict(X_test)
    accs.append(accuracy(y_test, pred))

    print(np.unique(pred))
    f1.append(f1_score(y_test, pred))
print('ACCURACY: ', np.mean(accs))
print('F1 SCORE: ', np.mean(f1))

# original multi_int SSA 5


In [None]:
accs = []
f1 = []
k = 0
for build in buildings:
    print(k)
    k += 1
    try:
        X = get_data(build, mode='original', dimension='multi_intervals', ssa=True, intervals = 5) 
    except IndexError:
        continue
    X = (X - X.mean()) / X.std()
    #print(X)
    y = np.array(build.anomaly)
    #print(y)
    
    y[np.where(y == 1)[0]] = -1
    y[np.where(y == 0)[0]] = 1
    print(np.unique(y))
    X_norm = X[np.where(y == 1)[0]]
    y_norm = y[np.where(y == 1)[0]]
    X_anom = X[np.where(y != 1)[0]]
    y_anom = y[np.where(y != 1)[0]]
    
    X_norm_train, X_norm_test, y_norm_train,y_norm_test = train_test_split(X_norm, y_norm, test_size=0.2)
    X_anom_train, X_anom_test, y_anom_train, y_anom_test = train_test_split(X_anom, y_anom, test_size=0.9)
    
    #print(X_norm_train.shape)
    X_train = np.concatenate((X_norm_train, X_anom_train), axis=0)
    y_train = np.concatenate((y_norm_train, y_anom_train), axis=0)
    X_test = np.concatenate((X_norm_test, X_anom_test), axis=0)
    y_test = np.concatenate((y_norm_test, y_anom_test), axis=0)
   
    #print(X_train.shape)
    #print(y_train.shape)
    X_train, y_train = shuffle(X_train, y_train, random_state=0)
    X_test, y_test = shuffle(X_test, y_test, random_state=0)

    model = IsolationForest().fit(X_train, y_train)
    pred = model.predict(X_test)
    accs.append(accuracy(y_test, pred))

    print(np.unique(pred))
    f1.append(f1_score(y_test, pred))
print('ACCURACY: ', np.mean(accs))
print('F1 SCORE: ', np.mean(f1))

# original multi_int SSA 15


In [None]:
accs = []
f1 = []
k = 0
for build in buildings:
    print(k)
    k += 1
    try:
        X = get_data(build, mode='original', dimension='multi_intervals', ssa=True, intervals = 15) 
    except IndexError:
        continue
    X = (X - X.mean()) / X.std()
    #print(X)
    y = np.array(build.anomaly)
    #print(y)
    
    y[np.where(y == 1)[0]] = -1
    y[np.where(y == 0)[0]] = 1
    print(np.unique(y))
    X_norm = X[np.where(y == 1)[0]]
    y_norm = y[np.where(y == 1)[0]]
    X_anom = X[np.where(y != 1)[0]]
    y_anom = y[np.where(y != 1)[0]]
    
    X_norm_train, X_norm_test, y_norm_train,y_norm_test = train_test_split(X_norm, y_norm, test_size=0.2)
    X_anom_train, X_anom_test, y_anom_train, y_anom_test = train_test_split(X_anom, y_anom, test_size=0.9)
    
    #print(X_norm_train.shape)
    X_train = np.concatenate((X_norm_train, X_anom_train), axis=0)
    y_train = np.concatenate((y_norm_train, y_anom_train), axis=0)
    X_test = np.concatenate((X_norm_test, X_anom_test), axis=0)
    y_test = np.concatenate((y_norm_test, y_anom_test), axis=0)
   
    #print(X_train.shape)
    #print(y_train.shape)
    X_train, y_train = shuffle(X_train, y_train, random_state=0)
    X_test, y_test = shuffle(X_test, y_test, random_state=0)

    model = IsolationForest().fit(X_train, y_train)
    pred = model.predict(X_test)
    accs.append(accuracy(y_test, pred))

    print(np.unique(pred))
    f1.append(f1_score(y_test, pred))
print('ACCURACY: ', np.mean(accs))
print('F1 SCORE: ', np.mean(f1))

# original Kalman


In [30]:
accs = []
f1 = []
k = 0
for build in buildings:
    print(k)
    k += 1
    try:
        X = get_data(build, mode='original', dimension='one', kalman=True, intervals = 5) 
    except IndexError:
        continue
    X = (X - X.mean()) / X.std()
    #print(X)
    y = np.array(build.anomaly)
    #print(y)
    
    y[np.where(y == 1)[0]] = -1
    y[np.where(y == 0)[0]] = 1
    print(np.unique(y))
    X_norm = X[np.where(y == 1)[0]]
    y_norm = y[np.where(y == 1)[0]]
    X_anom = X[np.where(y != 1)[0]]
    y_anom = y[np.where(y != 1)[0]]
    
    X_norm_train, X_norm_test, y_norm_train,y_norm_test = train_test_split(X_norm, y_norm, test_size=0.2)
    X_anom_train, X_anom_test, y_anom_train, y_anom_test = train_test_split(X_anom, y_anom, test_size=0.9)
    
    #print(X_norm_train.shape)
    X_train = np.concatenate((X_norm_train, X_anom_train), axis=0)
    y_train = np.concatenate((y_norm_train, y_anom_train), axis=0)
    X_test = np.concatenate((X_norm_test, X_anom_test), axis=0)
    y_test = np.concatenate((y_norm_test, y_anom_test), axis=0)
   
    #print(X_train.shape)
    #print(y_train.shape)
    X_train, y_train = shuffle(X_train, y_train, random_state=0)
    X_test, y_test = shuffle(X_test, y_test, random_state=0)

    model = IsolationForest().fit(X_train[:, np.newaxis], y_train)
    pred = model.predict(X_test[:, np.newaxis])
    accs.append(accuracy(y_test, pred))

    print(np.unique(pred))
    f1.append(f1_score(y_test, pred))
print('ACCURACY: ', np.mean(accs))
print('F1 SCORE: ', np.mean(f1))

0


NameError: name 'KalmanFilter' is not defined

# original multi Kalman 


In [None]:
accs = []
f1 = []
k = 0
for build in buildings:
    print(k)
    k += 1
    try:
        X = get_data(build, mode='original', dimension='multi', kalman=True, intervals = 15) 
    except IndexError:
        continue
    X = (X - X.mean()) / X.std()
    #print(X)
    y = np.array(build.anomaly)
    #print(y)
    
    y[np.where(y == 1)[0]] = -1
    y[np.where(y == 0)[0]] = 1
    print(np.unique(y))
    X_norm = X[np.where(y == 1)[0]]
    y_norm = y[np.where(y == 1)[0]]
    X_anom = X[np.where(y != 1)[0]]
    y_anom = y[np.where(y != 1)[0]]
    
    X_norm_train, X_norm_test, y_norm_train,y_norm_test = train_test_split(X_norm, y_norm, test_size=0.2)
    X_anom_train, X_anom_test, y_anom_train, y_anom_test = train_test_split(X_anom, y_anom, test_size=0.9)
    
    #print(X_norm_train.shape)
    X_train = np.concatenate((X_norm_train, X_anom_train), axis=0)
    y_train = np.concatenate((y_norm_train, y_anom_train), axis=0)
    X_test = np.concatenate((X_norm_test, X_anom_test), axis=0)
    y_test = np.concatenate((y_norm_test, y_anom_test), axis=0)
   
    #print(X_train.shape)
    #print(y_train.shape)
    X_train, y_train = shuffle(X_train, y_train, random_state=0)
    X_test, y_test = shuffle(X_test, y_test, random_state=0)

    model = IsolationForest().fit(X_train, y_train)
    pred = model.predict(X_test)
    accs.append(accuracy(y_test, pred))

    print(np.unique(pred))
    f1.append(f1_score(y_test, pred))
print('ACCURACY: ', np.mean(accs))
print('F1 SCORE: ', np.mean(f1))

# original multi_int Kalman 2


In [None]:
accs = []
f1 = []
k = 0
for build in buildings:
    print(k)
    k += 1
    try:
        X = get_data(build, mode='original', dimension='multi_intervals', kalman=True, intervals = 2) 
    except IndexError:
        continue
    X = (X - X.mean()) / X.std()
    #print(X)
    y = np.array(build.anomaly)
    #print(y)
    
    y[np.where(y == 1)[0]] = -1
    y[np.where(y == 0)[0]] = 1
    print(np.unique(y))
    X_norm = X[np.where(y == 1)[0]]
    y_norm = y[np.where(y == 1)[0]]
    X_anom = X[np.where(y != 1)[0]]
    y_anom = y[np.where(y != 1)[0]]
    
    X_norm_train, X_norm_test, y_norm_train,y_norm_test = train_test_split(X_norm, y_norm, test_size=0.2)
    X_anom_train, X_anom_test, y_anom_train, y_anom_test = train_test_split(X_anom, y_anom, test_size=0.9)
    
    #print(X_norm_train.shape)
    X_train = np.concatenate((X_norm_train, X_anom_train), axis=0)
    y_train = np.concatenate((y_norm_train, y_anom_train), axis=0)
    X_test = np.concatenate((X_norm_test, X_anom_test), axis=0)
    y_test = np.concatenate((y_norm_test, y_anom_test), axis=0)
   
    #print(X_train.shape)
    #print(y_train.shape)
    X_train, y_train = shuffle(X_train, y_train, random_state=0)
    X_test, y_test = shuffle(X_test, y_test, random_state=0)

    model = IsolationForest().fit(X_train, y_train)
    pred = model.predict(X_test)
    accs.append(accuracy(y_test, pred))

    print(np.unique(pred))
    f1.append(f1_score(y_test, pred))
print('ACCURACY: ', np.mean(accs))
print('F1 SCORE: ', np.mean(f1))

# original multi_int Kalman 5


In [None]:
accs = []
f1 = []
k = 0
for build in buildings:
    print(k)
    k += 1
    try:
        X = get_data(build, mode='original', dimension='multi_intervals', kalman=True, intervals = 5) 
    except IndexError:
        continue
    X = (X - X.mean()) / X.std()
    #print(X)
    y = np.array(build.anomaly)
    #print(y)
    
    y[np.where(y == 1)[0]] = -1
    y[np.where(y == 0)[0]] = 1
    print(np.unique(y))
    X_norm = X[np.where(y == 1)[0]]
    y_norm = y[np.where(y == 1)[0]]
    X_anom = X[np.where(y != 1)[0]]
    y_anom = y[np.where(y != 1)[0]]
    
    X_norm_train, X_norm_test, y_norm_train,y_norm_test = train_test_split(X_norm, y_norm, test_size=0.2)
    X_anom_train, X_anom_test, y_anom_train, y_anom_test = train_test_split(X_anom, y_anom, test_size=0.9)
    
    #print(X_norm_train.shape)
    X_train = np.concatenate((X_norm_train, X_anom_train), axis=0)
    y_train = np.concatenate((y_norm_train, y_anom_train), axis=0)
    X_test = np.concatenate((X_norm_test, X_anom_test), axis=0)
    y_test = np.concatenate((y_norm_test, y_anom_test), axis=0)
   
    #print(X_train.shape)
    #print(y_train.shape)
    X_train, y_train = shuffle(X_train, y_train, random_state=0)
    X_test, y_test = shuffle(X_test, y_test, random_state=0)

    model = IsolationForest().fit(X_train, y_train)
    pred = model.predict(X_test)
    accs.append(accuracy(y_test, pred))

    print(np.unique(pred))
    f1.append(f1_score(y_test, pred))
print('ACCURACY: ', np.mean(accs))
print('F1 SCORE: ', np.mean(f1))

# original multi_int Kalman 15


In [None]:
accs = []
f1 = []
k = 0
for build in buildings:
    print(k)
    k += 1
    try:
        X = get_data(build, mode='original', dimension='multi_intervals', kalman=True, intervals = 15) 
    except IndexError:
        continue
    X = (X - X.mean()) / X.std()
    #print(X)
    y = np.array(build.anomaly)
    #print(y)
    
    y[np.where(y == 1)[0]] = -1
    y[np.where(y == 0)[0]] = 1
    print(np.unique(y))
    X_norm = X[np.where(y == 1)[0]]
    y_norm = y[np.where(y == 1)[0]]
    X_anom = X[np.where(y != 1)[0]]
    y_anom = y[np.where(y != 1)[0]]
    
    X_norm_train, X_norm_test, y_norm_train,y_norm_test = train_test_split(X_norm, y_norm, test_size=0.2)
    X_anom_train, X_anom_test, y_anom_train, y_anom_test = train_test_split(X_anom, y_anom, test_size=0.9)
    
    #print(X_norm_train.shape)
    X_train = np.concatenate((X_norm_train, X_anom_train), axis=0)
    y_train = np.concatenate((y_norm_train, y_anom_train), axis=0)
    X_test = np.concatenate((X_norm_test, X_anom_test), axis=0)
    y_test = np.concatenate((y_norm_test, y_anom_test), axis=0)
   
    #print(X_train.shape)
    #print(y_train.shape)
    X_train, y_train = shuffle(X_train, y_train, random_state=0)
    X_test, y_test = shuffle(X_test, y_test, random_state=0)

    model = IsolationForest().fit(X_train, y_train)
    pred = model.predict(X_test)
    accs.append(accuracy(y_test, pred))

    print(np.unique(pred))
    f1.append(f1_score(y_test, pred))
print('ACCURACY: ', np.mean(accs))
print('F1 SCORE: ', np.mean(f1))