In [1]:
import os
#os.environ['http_proxy'] = ''
#os.environ['https_proxy'] = ''

#### In this tutorial we will demonstrate how to load a neuron array directly into the pipeline in order to create pipelines based off of pre-trained models. 

In [2]:
import pandas as pd
from kbclient.kb_dsk_basic.kb import KB

dsk = KB()
dsk.project ='Activity Case Study'
dsk.pipeline = 'Activity_Pipeline'

Sandbox Activity_Pipeline does not exist, creating a new sandbox.


In [3]:
sensor_columns = dsk.project.columns()
df = pd.read_csv('Support/activities_combinedSignalsWithLabel_medium.csv')
df['Subject'] = df['Subject'].apply(lambda x: int(x[1:]))
df.head()

Unnamed: 0,Subject,Activity,AccelerometerX,AccelerometerY,AccelerometerZ
0,1,0,-317,-3000,925
1,1,0,-284,-2968,903
2,1,0,-243,-2987,933
3,1,0,-193,-3051,936
4,1,0,-150,-3059,915


This is the format of a neuron array for a trained model.

In [3]:
neuron_array = [{u'AIF': 1,
              u'Category': 2,
              u'Context': 1,
              u'Identifier': 1,
              u'Vector': [0, 0, 0, 0, 0, 124, 0, 0]},
              {u'AIF': 1,
              u'Category': 2,
              u'Context': 1,
              u'Identifier': 2,
              u'Vector': [166, 31, 172, 138, 93, 31, 254, 35]},
              {u'AIF': 143,
              u'Category': 3,
              u'Context': 1,
              u'Identifier': 3,
              u'Vector': [246, 232, 243, 68, 241, 156, 72, 16]},
              {u'AIF': 1,
              u'Category': 3,
              u'Context': 1,
              u'Identifier': 4,
              u'Vector': [134, 171, 163, 42, 167, 114, 19, 254]},
              {u'AIF': 349,
              u'Category': 4,
              u'Context': 1,
              u'Identifier': 5,
              u'Vector': [118, 165, 147, 184, 156, 191, 2, 0]},
              {u'AIF': 449,
              u'Category': 1,
              u'Context': 1,
              u'Identifier': 6,
              u'Vector': [97, 78, 107, 43, 92, 106, 56, 4]},
              {u'AIF': 354,
              u'Category': 4,
              u'Context': 1,
              u'Identifier': 7,
              u'Vector': [131, 184, 165, 53, 174, 128, 2, 3]}]

A simple pipeline which segements, filters and generates features. 

In [4]:
dsk.pipeline.reset()
dsk.pipeline.set_columns(data_columns=['AccelerometerY'], group_columns=['Activity','Subject'], label_column='Activity')

dsk.pipeline.set_input_data('activity_data', df,  data_columns=['AccelerometerY'])

dsk.pipeline.add_transform('Scale Factor', params={'scale_factor':4096.,
                                                   'input_columns':['AccelerometerY']})
dsk.pipeline.add_transform('Windowing')

dsk.pipeline.add_transform('MSE Filter', params={'input_column':'AccelerometerY',})

dsk.pipeline.add_feature_generator(["Mean", 'Standard Deviation', 'Skewness', 'Kurtosis',
                                    '25th Percentile', '75th Percentile', '100th Percentile',
                                    'Zero Crossing Rate'],
                                    function_defaults = {"columns":[u'AccelerometerY']})

dsk.pipeline.add_feature_selector([{"name":"Recursive Feature Elimination","params":{"method":"Log R"}}],
                                   params = {"number_of_features":8,})

dsk.pipeline.add_transform('Min Max Scale')

Uploading file "activity_data.csv" to KB Cloud.
Using featurefile previously loaded to KB cloud, use force=True to overwrite.


Here we will use the training algorithm "Load Neuron Array" which will load the neurons directly into the hardware simulator and procede to run the cross-fold validation on the model. Note, no training occurs we are simply using the provided neurons.

In [5]:
dsk.pipeline.set_training_algorithm('Load Neuron Array', params = {'neuron_array':neuron_array})

dsk.pipeline.set_validation_method('Stratified K-Fold Cross-Validation', params={'number_of_folds':5})

dsk.pipeline.set_classifier('PVP', params={"classification_mode":'RBF','distance_mode':'L1'})

dsk.pipeline.set_tvo()

In [6]:
results, stats = dsk.pipeline.execute()

Executing Pipeline with Steps:

------------------------------------------------------------------------
 0.     Name: activity_data.csv         		Type: featurefile              
------------------------------------------------------------------------
------------------------------------------------------------------------
 1.     Name: Scale Factor              		Type: transform                
------------------------------------------------------------------------
------------------------------------------------------------------------
 2.     Name: Windowing                 		Type: segmenter                
------------------------------------------------------------------------
------------------------------------------------------------------------
 3.     Name: MSE Filter                		Type: transform                
------------------------------------------------------------------------
------------------------------------------------------------------------
 4.     Name: g

In [7]:
results.summarize()

TRAINING ALGORITHM: Load Neuron Array
VALIDATION METHOD:  Stratified K-Fold Cross-Validation
CLASSIFIER:         PVP

AVERAGE METRICS:
F1-SCORE:    9.6   sigma 1.17
SENSITIVITY: 27.2   sigma 0.13
PRECISION:   5.8   sigma 0.86

--------------------------------------

STRATIFIED K-FOLD CROSS-VALIDATION MODEL RESULTS

MODEL INDEX: Fold 3
ACCURACY: 6.83
NEURONS: 7

MODEL INDEX: Fold 4
ACCURACY: 6.83
NEURONS: 7

MODEL INDEX: Fold 1
ACCURACY: 5.56
NEURONS: 7

MODEL INDEX: Fold 2
ACCURACY: 4.94
NEURONS: 7

MODEL INDEX: Fold 0
ACCURACY: 4.91
NEURONS: 7

