## This notebook demonstrates the usage of the Fuzzy Inference System pipeline. It traines a FIS on the gas consumption data and shows the performance on a small test batch. 

In [1]:
import numpy as np
import csv
import os
import sys
import matplotlib.pyplot as plt

# add path of needed code
sys.path.append(os.getcwd()+'/Code/')

from Fuzzification import cluster, scale
import FIS

%matplotlib inline

In [3]:
def read_data(path, file):
    '''
    Reads in the gas data and returns it
    as a float type numpy array.
    Formats are in csv and the first two
    columns (row ID and date) are removed
    '''
    data_train= []
    with open(path+file+'.csv') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        for row in reader:
            try:
                data_train.append([float(x) for x in row[2:]])
            except: 
                header = row
                print('The header: ')
                print(header[2:])
                ValueError
    return np.array(data_train)

In [8]:
path = 'Data/gas/'
file = 'train1'
data = read_data(path, file)
# for this data set, the target column is de first column
target_col = 0

['gas', 'before1', 'before2', 'peak5', 'sum5', 'peak24', 'sum24', 'mean15', 'hour', 'FH', 'T', 'Q', 'U', 'peak5T', 'diffT', 'std_day', 'std_year', 'kwh', 'kwhpeak5', 'day_year', 'day_week', 'next_day_week']


In [6]:
# specify overlap of your sets, or the variance of the gaussian
overlap = 0.07
# Gaussian, triangle or trapezoid
mf = 'Gaussian'
# number of clusters per feature
Ncentroids = [11,7,7,7,9,9,11,9,11,11,11,11,11,9,11,9,9,9,9,11,7,7]
# A name for the FIS file
FIS_name='Demo_WM_gas'


## Training

In [9]:
FIS.train(FIS_name, data, target_col, mf, Ncentroids, overlap)

In [10]:
# read in the trained FIS
method, mf, overlap, target_centroids, feature_centroids, RB = FIS.read(FIS_name +'.FIS')

## Testing

In [12]:
path = 'Data/gas/'
file = 'test1'
data = read_data(path, file)

['gas', 'before1', 'before2', 'peak5', 'sum5', 'peak24', 'sum24', 'mean15', 'hour', 'FH', 'T', 'Q', 'U', 'peak5T', 'diffT', 'std_day', 'std_year', 'kwh', 'kwhpeak5', 'day_year', 'day_week', 'next_day_week']


In [16]:
# testing of the FIS (NB takes a lot of time so reduce data size)
RMSE, MAE, _ = FIS.test(data[0:100], mf, overlap, target_centroids, feature_centroids, RB, target_col)

In [18]:
print('The RMSE is: ', RMSE)
print('The MAE is: ' , MAE)
print('The number of rules is ', len(RB))

('The RMSE is: ', 15.219587596551055)
('The MAE is: ', 11.67148790726425)
('The number of rules is ', 40839)
