## This notebook demonstrates the usage of the Fuzzy Inference System pipeline. It traines a FIS on the gas consumption data and shows the anomaly classification performance on a small test batch. 

In [1]:
import numpy as np
import csv
import os
import sys
import matplotlib.pyplot as plt

import time

# add path of needed code 
parent_directory = os.path.abspath('..')
sys.path.append(parent_directory+'/Code/')

from Fuzzification import cluster, scale
import Reader
import FIS

%matplotlib inline

In [2]:
path = parent_directory +'/Data/gas/'
file = 'train1'
data = Reader.read_gas_data(path, file)
# for this data set, the target column is de first column
target_col = 0

The header: 
['gas', 'before1', 'before2', 'peak5', 'sum5', 'peak24', 'sum24', 'mean15', 'hour', 'FH', 'T', 'Q', 'U', 'peak5T', 'diffT', 'std_day', 'std_year', 'kwh', 'kwhpeak5', 'day_year', 'day_week', 'next_day_week']


In [3]:
# specify overlap of your sets, or the variance of the gaussian
overlap = 0.07
# Gaussian, triangle or trapezoid
mf = 'Gaussian'
# number of clusters per feature
number_of_centroids = [11,7,7,7,9,9,11,9,11,11,11,11,11,9,11,9,9,9,9,11,7,7]
# A name for the FIS file
FIS_name='Demo_WM_gas'

## Training

In [4]:
# train the FIS
start = time.time()
FIS.train(FIS_name, data, target_col, mf, number_of_centroids, overlap)
print('The training took: ', time.time() - start, 'seconds')

The training took:  43.500887870788574 seconds


In [5]:
# read in the trained FIS
method, mf, overlap, target_centroids, feature_centroids, RB = FIS.read(FIS_name +'.FIS')

## Compute the threshold

In [6]:
path = parent_directory +'/Data/gas/'
file = 'NTH_outliers_2008-2012'
anomalies = Reader.read_gas_data(path, file)

The header: 
['before1', 'before2', 'peak5', 'sum5', 'peak24', 'sum24', 'mean15', 'hour', 'FH', 'T', 'Q', 'U', 'peak5T', 'diffT', 'std_day', 'std_year', 'kwh', 'kwhpeak5', 'day_year', 'day_week', 'next_day_week']


In [7]:
# compute threshold (NB takes a lot of time so reduce data size)
# if no threshold is given, then it returns the mean firing strength of the given data
start = time.time()
DATA_SIZE = 100
_, _, threshold = FIS.test(anomalies[0:DATA_SIZE], mf, overlap, target_centroids, feature_centroids, RB, target_col)
print('The testing took: ', time.time() - start, 'seconds')

The testing took:  22.51996898651123 seconds


In [8]:
print('The mean fring strength of the outliers batch is : ', threshold )

The mean fring strength of the outliers batch is :  1.79935065838e-15


## Anomaly Classification

### True Positives

In [9]:
# Anomaly classification (NB takes a lot of time so reduce data size)
# with a different batch of anomalies
_, _, true_positives = FIS.test(anomalies[100:200], mf, overlap, target_centroids, feature_centroids, RB, 
                                target_col, threshold = threshold )

In [10]:
print('The number of true positives is: ', true_positives, ' out of ', DATA_SIZE)

The number of true positives is:  91  out of  100


### False negatives

In [11]:
path = parent_directory +'/Data/gas/'
file = 'test1'
data_test = Reader.read_gas_data(path, file)

The header: 
['gas', 'before1', 'before2', 'peak5', 'sum5', 'peak24', 'sum24', 'mean15', 'hour', 'FH', 'T', 'Q', 'U', 'peak5T', 'diffT', 'std_day', 'std_year', 'kwh', 'kwhpeak5', 'day_year', 'day_week', 'next_day_week']


In [12]:
# Anomaly classification (NB takes a lot of time so reduce data size)
# with a different batch of anomalies
_, _, false_negatives = FIS.test(data_test[DATA_SIZE:2*DATA_SIZE], mf, overlap, target_centroids, feature_centroids, RB, 
                                target_col, threshold = threshold )

In [13]:
print('The number of false negatives is: ', false_negatives, ' out of ', DATA_SIZE)

The number of false negatives is:  1  out of  100
