## Load Data

In [1]:
# Load Libraries

import numpy as np
import pandas as pd
import glob, os
from driftDataPreprocessing import sequenceNormalize, dataSplitShuffle

In [2]:
def readDriftData(dir, pattern, drift=True):
    """
    Read Data Function 
    Input: 
        1. dir: data directory
        2. pattern: files pattern
        3. drift: boolean. True if drift, False if no drift. This is under the assumption of only 100 drift files
    """
    if drift:
        driftData = dict()
        for pathAndFilename in glob.iglob(os.path.join(dir, pattern)):
            title, ext = os.path.splitext(os.path.basename(pathAndFilename))
            driftData[title] = np.loadtxt(pathAndFilename, delimiter=',', skiprows=1)[:50, 1:]
            driftData[title] = np.insert(driftData[title], 3, int(title)+100, axis=1)
            
    else:
        driftData = dict()
        for pathAndFilename in glob.iglob(os.path.join(dir, pattern)):
            title, ext = os.path.splitext(os.path.basename(pathAndFilename))
            driftData[title] = np.loadtxt(pathAndFilename, delimiter=',', skiprows=1)[:50, 1:]
            driftData[title] = np.insert(driftData[title], 3, int(title), axis=1)
            
    return np.stack(driftData.values())

In [3]:
# Load the data

driftData1 = readDriftData('./Drift_Data/Drift/NoDrift', '*.*', False)
driftData2 = readDriftData('./Drift_Data/Drift/Drift', '*.*', True)

In [4]:
# combine both drift and no drift data
driftData = np.vstack([driftData1, driftData2])

# Define Data Dimensions
driftDataAxis0 = driftData.shape[0]
driftDataAxis1 = driftData.shape[1]
driftDataAxis2 = driftData.shape[2]

# Flattening the data
flattenData = driftData.reshape(driftDataAxis0 * driftDataAxis1, driftDataAxis2)

In [5]:
# Adding exp_id data
exp_id = flattenData[:, -1]
data_to_normalize = flattenData[:, :-1]

# Normalizing the data
scaler = sequenceNormalize(data_to_normalize)
normalizedData = scaler.transform(data_to_normalize)

# Reshape the data
driftDataReshape = np.insert(normalizedData, 3, exp_id, axis=1)

In [6]:
# Converting the data into data frame prior to converting it to SFrame
driftFrame = pd.DataFrame(driftDataReshape, columns=['acc_x', 'acc_y', 'acc_z', 'exp_id'])
driftFrame['exp_id'] = driftFrame['exp_id'].astype('int')
driftFrame['exp_id'] = driftFrame['exp_id'].astype('int')

In [7]:
# Converting the data into SFrame
import turicreate as tc

driftSFrame = tc.SFrame(data=driftFrame)

target_map = {
    1.: 'drift',
    2.: 'no_drift',
}

driftSFrame['activity_id'] = driftSFrame['exp_id'].apply(lambda x: 1. if x>=100 else 2.)
driftSFrame['activity'] = driftSFrame['activity_id'].apply(lambda x: target_map[x])
driftSFrame = driftSFrame.remove_column('activity_id')

In [8]:
# Visualize the data
driftSFrame.head()

acc_x,acc_y,acc_z,exp_id,activity
0.8689566699493465,0.0301944728761514,0.8398534896490352,97,no_drift
0.8648677051106162,0.0324027334204369,0.8413133324638574,97,no_drift
0.8648677051106162,0.0324027334204369,0.8413133324638574,97,no_drift
0.8605266709015283,0.0335655602731389,0.8519728153594756,97,no_drift
0.8605266709015283,0.0335655602731389,0.8519728153594756,97,no_drift
0.8580228299734316,0.0430617151711941,0.855810930627875,97,no_drift
0.8614092748286825,0.0503258136139262,0.8555818712072308,97,no_drift
0.8723431148045975,0.0538380900830699,0.8442823371075167,97,no_drift
0.8723431148045975,0.0538380900830699,0.8442823371075167,97,no_drift
0.8772966270253274,0.053885681905145,0.8420344860465551,97,no_drift


## Train the Model with Turicreate

In [9]:
# Load sessions from preprocessed data
data = driftSFrame

# Train/test split by recording sessions
train, test = tc.activity_classifier.util.random_split_by_session(data, session_id='exp_id', fraction=0.8)

# Create an activity classifier
model = tc.activity_classifier.create(train, session_id='exp_id', target='activity',
                                      prediction_window=50, max_iterations=10, batch_size=32)

# Evaluate the model and save the results into a dictionary
metrics = model.evaluate(test)
print(metrics['accuracy'])

# Save the model for later use in Turi Create
model.save('mymodel.model')

# Export for use in Core ML
model.export_coreml('DriftClassifier.mlmodel')

The dataset has less than the minimum of 100 sessions required for train-validation split. Continuing without validation set


Using GPU to create model (AMD Radeon Pro 560)
+----------------+----------------+----------------+----------------+
| Iteration      | Train Accuracy | Train Loss     | Elapsed Time   |
+----------------+----------------+----------------+----------------+
| 1              | 0.375          | 0.083          | 0.1            |
| 2              | 0.805          | 0.023          | 0.3            |
| 3              | 0.805          | 0.018          | 0.4            |
| 4              | 0.922          | 0.011          | 0.5            |
| 5              | 0.836          | 0.012          | 0.6            |
| 6              | 0.984          | 0.004          | 0.7            |
| 7              | 0.992          | 0.003          | 0.8            |
| 8              | 0.992          | 0.001          | 0.9            |
| 9              | 0.969          | 0.002          | 1.1            |
| 10             | 0.992          | 0.001          | 1.2            |
+----------------+----------------+--------