In [None]:
manifest = {
    'memory': 1024,
    'disk_quota': 1024,
    'buildpack': 'python_buildpack',
    'requirements': [
        'numpy==1.12.1',
        'pandas',
        'influxdb',
        'scipy',
        'sklearn',
        'git+https://github.com/benchuang11046/afs.git'
    ],
    'type': 'APP'
}

#### ***IMPROTANT***
About the above cell, it will tansform the program in the notebook into an "APP" for AFS.    
Please be sure that the "manifest" is the first word of this notebook.

# Demo    
## Training Support Vector Machine (svm) and Decision Tree (dt) model      
---
In the demo, there five parts are included as follows: 
1. InfluxDB Configurations  
2. Data preprocessing  
3. Training SVM model
4. Training Decision Tree model  
5. AFS SDK


## 1. InfluxDB Configurations    

In [None]:
# input the information of influxDB
from influxdb import DataFrameClient
# serviceHost
host = '124.9.14.29'
port=8086
# username
user = '7298659d-0a91-4c2c-b39d-846053654366'
# password
password = 's20W5LGBknHTxFNhfgM6vWYAz'
# database
dbname = '80e06c52-e79f-4978-92da-7eff7359b445'

# Temporarily avoid line protocol time conversion issues #412, #426, #431.
protocol = 'json'

### Read data from InfluxDB

In [None]:
# DataFrameClient transfrom data from DB to dataframe
client = DataFrameClient(host, port, user, password, dbname)
result = client.query('show measurements')
# print("Result: {0}".format(result))

# measurements
measurements = 'fh_test2'

# Read sample_data
sample_data = client.query('select * from ' + measurements)
data = sample_data[measurements]

## 2. Data preprocessing

In [None]:
import pandas as pd
import numpy as np
from sklearn import svm
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.externals import joblib

data['EVENT'] = np.where(data['TEMPERATURE_OUTPUT']==98, '1', '0')
col = [ 'STATUS_FAN',
        'VOLTAGE_INPUT',
        'PRESSURE_OUTPUT',
        'KW_FAN',       
        'KW_EQUIPMENT',      
        'KW_SUMMARY'          
    ]
col_2 = [ 'STATUS_FAN',
        'VOLTAGE_INPUT',
        'PRESSURE_OUTPUT',
        'KW_FAN',       
        'KW_EQUIPMENT',      
        'KW_SUMMARY',
        'EVENT'
    ]

data_1 = data[col]
data_2 = data[col_2]

data_10 = np.concatenate((data_1,data_1,data_1,data_1,data_1,
                          data_1,data_1,data_1,data_1,data_2),axis=1)

# shift to make time window
data_10 = pd.DataFrame(data_10)

for i in range(10):
    data_10.iloc[:,0+6*i:6+6*i] = data_10.iloc[:,0+6*i:6+6*i].shift(periods=9-i)

data_10.iloc[:,0:60] = data_10.iloc[:,0:60].shift(periods=12)
data = data_10.dropna(axis=0)

# split data into train and test sets
X = data.iloc[:,0:60]
Y = data.iloc[:,60]
seed = 100
test_size = 0.3
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)

## 3. Training SVM model    
#### Save the model as "svm_model_csc.pkl" file after training

In [None]:
# training SVM model
svm_clf = svm.SVC(kernel='rbf', C=1.0)
svm_clf = svm_clf.fit(X_train, y_train) 

y_pre = svm_clf.predict(X_test)
svm_accuracy = accuracy_score(y_test, y_pre)
print("SVM Accuracy: %.2f%%" % (svm_accuracy * 100.0))

# save model
joblib.dump(svm_clf, 'svm_model_csc.pkl') 

## 4. Training Decision Tree model    
#### Save the model as "dt_model_csc.pkl" file after training

In [None]:
# Training Decision Tree
dt_clf = tree.DecisionTreeClassifier()
dt_clf = dt_clf.fit(X_train, y_train)

# evaluate predictions
y_pre = dt_clf.predict(X_test)
dt_accuracy = accuracy_score(y_test, y_pre)
print("DT Accuracy: %.2f%%" % (dt_accuracy * 100.0))

# save model to pkl
joblib.dump(dt_clf, 'dt_model_csc.pkl') 

## 5. AFS SDK

In [None]:
# AFS SDK for SVM
from afs.client import afs
client = afs()
client.models.upload_model('svm_model_csc.pkl', 
                           accuracy=svm_accuracy, 
                           loss=None, 
                           tags=dict(machine='machine01'))

In [None]:
# AFS SDK for Decision Tree
from afs.client import afs
client = afs()
client.models.upload_model('dt_model_csc.pkl', 
                           accuracy=dt_accuracy, 
                           loss=None, 
                           tags=dict(machine='machine01'))
