# Quadcopter Activity Recognition - TRAINING YOUR MODEL

Welcome to this training module that teaches you how to perform activity recognition with the help of a hosted Machine Learning instance. This notebook will guide you through the process of extracting information from your flight record!

## Getting started

So let's start with what group name you are... this will help me pull your raw accelerometer data from the cloud

In [None]:
# import necessary libraries
import json
import boto3
import boto3.session
import string
import os
import csv
import numpy as np

s3_workspace_bucket = 'mldelarosa-thesis'

s3_subdir_group_training_flight_log = 'mar-lab-workspace/exercise-training/group-training-dataset/'
s3_subdir_group_training_dataset = 'mar-lab-workspace/exercise-training/group-training-dataset/'

jupyter_subdir_group_training_dataset = './data/training-group-dataset/'
jupyter_subdir_group_training_flight_log = './data/training-group-logs/'
jupyter_subdir_group_workspace = './data/group-workspace/'

def make_path(file_path):
    directory = os.path.dirname(file_path)
    if not os.path.exists(directory):
        os.makedirs(directory)

def get_s3_client():
    session = boto3.session.Session()
    s3 = session.resource(service_name='s3', verify=True)
    return s3.meta.client

GROUP_NAME='default'

print('[DONE] Runtime Initialized.')

# [INACTIVE] Lets pull your group's latest Training flight log

In [None]:
GROUP_NAME = 'default'

print ('Pulling latest training data from S3 for [ MAR DATABASE ]');

client = get_s3_client()
s3_filepath_group_training_dataset_latest = s3_subdir_group_training_dataset + GROUP_NAME + '/imu-data-log-latest';
jupyter_filepath_group_training_dataset_latest = jupyter_subdir_group_workspace + GROUP_NAME + '/imu-latest-dataset.csv';

print('Downloading from: ' + s3_workspace_bucket + '/' + s3_filepath_group_training_dataset_latest)
make_path(jupyter_filepath_group_training_dataset_latest)
print('Downloading to: ' + jupyter_filepath_group_training_dataset_latest)
group_flight_record = client.download_file(Bucket=s3_workspace_bucket,
                                           Key=s3_filepath_group_training_dataset_latest,
                                           Filename=jupyter_filepath_group_training_dataset_latest)
filepath_group_flight_dataset_latest = jupyter_filepath_group_training_dataset_latest
filepath_group_consolidated_flight_dataset = filepath_group_flight_dataset_latest
print('[DONE]')

# [INACTIVE] Let's pull your group's training session...

In [None]:
GROUP_NAME = 'default'

print ('Pulling latest flight data from S3 for group [ ' + GROUP_NAME + ' ]');

client = get_s3_client()
s3_filepath_group_training_flight_log_latest = s3_subdir_group_training_flight_log + GROUP_NAME + '/flight-log-latest';
jupyter_filepath_group_training_flight_log_latest = jupyter_subdir_group_workspace + GROUP_NAME + '/flight-log-latest.txt';

print('Downloading from: ' + s3_workspace_bucket + '/' + s3_filepath_group_training_flight_log_latest)
make_path(jupyter_filepath_group_training_flight_log_latest)
print('Downloading to: ' + jupyter_filepath_group_training_flight_log_latest)
group_flight_log_record = client.download_file(Bucket=s3_workspace_bucket,
                                           Key=s3_filepath_group_training_flight_log_latest,
                                           Filename=jupyter_filepath_group_training_flight_log_latest)
print('[DONE]')
print('========= ' + GROUP_NAME + ' FLIGHT LOG =========')
with open(jupyter_filepath_group_training_flight_log_latest, 'r') as flightLog:
    print(flightLog.read())

# Downloading MAR Dataset for training the model

In [None]:
# Download prepped training data
s3_filepath_mar_training_database = 'mar-lab-workspace/exercise-training/mar-training-database/imu-database-log-entry-latest'
jupyter_filepath_mar_training_database = './data/exercise-training-session/' + GROUP_NAME + '/imu-db/imu-database-training-set.csv'

client = get_s3_client()
print('Getting from: ' + s3_filepath_mar_training_database)
print('Downloading to: ' + jupyter_filepath_mar_training_database)
make_path(jupyter_filepath_mar_training_database)
group_flight_record = client.download_file(Bucket='mldelarosa-thesis',
                                           Key=s3_filepath_mar_training_database,
                                           Filename=jupyter_filepath_mar_training_database)
print('DONE')

In [None]:
def extract_label_sets_from_file(data_filepath, destination_dir):
    labelled_file = open(data_filepath, 'r')
    
    csv_columns = ['accelerometer_x','accelerometer_y','accelerometer_z','gyrometer_x','gyrometer_y','gyrometer_z']
    
    running_index = 0
    running_label = ''
    running_sample_index = {}
    running_sample_filename = ''
    labelled_row_reader = csv.DictReader(labelled_file)
    for labelled_row in labelled_row_reader:
        if(running_label != labelled_row['label']):
            # Iterate sample file index for the current label
            running_label = labelled_row['label']
            if running_label in running_sample_index.keys():
                running_sample_index[running_label] = running_sample_index[running_label] + 1;
            else:
                running_sample_index[running_label] = 0;
            running_index = 0;
            running_sample_filename = destination_dir + '/' + running_label + '/' + running_label + '-sample-' + str(running_sample_index[running_label]) + '.csv'
            make_path(running_sample_filename)
            running_sample_file = open(running_sample_filename, 'w')
            running_sample_file.write(','.join(csv_columns) + '\n')
            running_sample_file.write(labelled_row['accelerometer_x']
                                + ',' + labelled_row['accelerometer_y']
                                + ',' + labelled_row['accelerometer_z']
                                + ',' + labelled_row['gyrometer_x']
                                + ',' + labelled_row['gyrometer_y']
                                + ',' + labelled_row['gyrometer_z'] + '\n')
        else:
            running_index = running_index + 1
            running_sample_file.write(labelled_row['accelerometer_x']
                                + ',' + labelled_row['accelerometer_y']
                                + ',' + labelled_row['accelerometer_z']
                                + ',' + labelled_row['gyrometer_x']
                                + ',' + labelled_row['gyrometer_y']
                                + ',' + labelled_row['gyrometer_z'] + '\n')

s3_subdir_group_training_session = './data/exercise-training-session/' + GROUP_NAME + '/'
extract_label_sets_from_file(jupyter_filepath_mar_training_database, s3_subdir_group_training_session + 'imu-db/')
print('DONE')

In [None]:
from itertools import islice

feature_csv_columns = ['average', 'median']
imu_data_columns = ['accelerometer_x','accelerometer_y','accelerometer_z','gyrometer_x','gyrometer_y','gyrometer_z']

def feature_average(data_sample):
    fSum = 0;
    nIndex = 0;
    for data in data_sample:
        fSum = fSum + data
        nIndex = nIndex + 1
    return float(fSum / nIndex)

def feature_variance(data_sample):
    return np.var(data_sample)

def feature_median(data_sample):
    return np.median(data_sample, axis=0)

feature_calculations = {
    'average' : feature_average,
    'median' : feature_median
}

# Read a *.csv file and extract the sliding window
import collections
def extract_features_from_imu_data_samples_for_label(data_sample_filepath, features_filepath, data_label):
#     print('Extracting for feature: ', data_label, 'from', data_sample_filepath)
    with open(data_sample_filepath, 'r') as csv_file:
        # extract data records by row
        reader = csv.DictReader(csv_file)
        sliding_windows = []
        sliding_index = 0
        window_step_forward = 1
        window_length = 4
#         if(len(list(reader)) < window_length):
#             print('File too short [' + data_sample_filepath + ']')
#             return
        
        # extract sliding windows from rows
        sliding_window_csv = []
        for row in reader:
            sliding_window_csv.append(row)
            if(len(sliding_window_csv) == window_length + 1):
                del sliding_window_csv[0]
            if(sliding_index % window_step_forward == 0 and len(sliding_window_csv) == window_length):
                sliding_windows.append(list(sliding_window_csv))
            sliding_index = sliding_index + 1
        running_window_lines = []
                
        for feature_name, feature_func in feature_calculations.items():
            for imu_data_column in imu_data_columns:
                window_sequences = []
                for sliding_window in sliding_windows:
                    window_sequence = []
                    for window in sliding_window:
                        window_sequence.append(float(window[imu_data_column]))
                    window_sequences.append(window_sequence)
#                 print(imu_data_column, ' - ', window_sequences)
                
                window_index = 0
                comma_index = 0;
                window_count = len(window_sequences)
                while len(running_window_lines) < window_count:
                    running_window_lines.append('')
                for window in window_sequences:
                    running_window_lines[window_index % window_count] += (str(feature_func(window))) + ','
#                     print(imu_data_column, ' _ ', feature_name, feature_func(window))
                    window_index = window_index + 1
#     print('PRINT FOR WINDOW ', window_count , running_window_lines)

    with open(features_filepath, 'a') as features_file:
        for feature_line in running_window_lines:
            features_file.write(data_label + ',' + feature_line[:-1] + '\n')
            

# Iterate through the raw IMU data directories and get their labels
import glob
import os
labels = set()
print('Extracting labels for samples in raw training data directory:')
for raw_data_dir in glob.glob(s3_subdir_group_training_session + 'imu-db' + '/*', recursive=True):
    labels.add(os.path.basename(raw_data_dir))
print(labels)

# labels = set()
# print('Extracting labels for samples in raw evaluation data directory:')
# for raw_data_dir in glob.glob('./data/evaluation/raw/samples/*', recursive=True):
#     labels.add(os.path.basename(raw_data_dir))
# print(labels)

#os.remove('./data/features.csv')
with open(s3_subdir_group_training_session + 'training-data.csv', 'w') as file:
    file.write('')

# with open('./data/evaluation-feature-data.csv', 'w') as file:
#     file.write('')

# Don't include first csv row for training files
#     running_line = 'label,'
#     for feature_columns in feature_csv_columns:
#         for imu_data_column in imu_data_columns:
#             running_line += feature_columns + '_' + imu_data_column + ','
#     running_line = running_line[:-1]
#     running_line += '\n'


# Iterate through each raw IMU data sample and extract their features:
for labelled_features in labels:
    for raw_data_dir in glob.glob(s3_subdir_group_training_session + 'imu-db/' + labelled_features + '/*.csv', recursive=False):
        print("Extracting features from " + raw_data_dir)
        extract_features_from_imu_data_samples_for_label(raw_data_dir, s3_subdir_group_training_session + 'training-feature-data-latest.csv', labelled_features)

# for labelled_features in labels:
#     for raw_data_dir in glob.glob('./data/evaluation/raw/samples/' + labelled_features + '/*.csv', recursive=False):
# #         print("Extracting features from " + raw_data_dir)
#         extract_features_from_imu_data_samples_for_label(raw_data_dir, './data/evaluation-feature-data.csv', labelled_features)


## Train a linear classifier

In [None]:
import tensorflow as tf
import os
import shutil
import sys

# THIS MODE-DIR POINTS TO THE CLASSIFICATION EXERCISE WORKSPACE!!!!
model_dir='../mar-classification-exercise/tmp/model/' + GROUP_NAME + '/'
train_data='./data/exercise-training-session/' + GROUP_NAME + '/training-feature-data-latest.csv'
eval_data='./data/exercise-training-session/' + GROUP_NAME + '/training-feature-data-latest.csv' # CHANGE ME

# delete the model directory
shutil.rmtree(model_dir, ignore_errors=True)

# declare feature columns within csv
median_gyro_roll = tf.feature_column.numeric_column(key='median_gyro_roll', dtype=tf.float64);
median_gyro_pitch = tf.feature_column.numeric_column(key='median_gyro_pitch', dtype=tf.float64);
median_gyro_yaw = tf.feature_column.numeric_column(key='median_gyro_yaw', dtype=tf.float64);

median_acc_x = tf.feature_column.numeric_column(key='median_acc_x', dtype=tf.float64);
median_acc_y = tf.feature_column.numeric_column(key='median_acc_y', dtype=tf.float64);
median_acc_z = tf.feature_column.numeric_column(key='median_acc_z', dtype=tf.float64);

mean_gyro_roll = tf.feature_column.numeric_column(key='mean_gyro_roll', dtype=tf.float64);
mean_gyro_pitch = tf.feature_column.numeric_column(key='mean_gyro_pitch', dtype=tf.float64);
mean_gyro_yaw = tf.feature_column.numeric_column(key='mean_gyro_yaw', dtype=tf.float64);

# stack feature columns into a single array
imu_window_feature_columns = [median_gyro_roll, median_gyro_pitch, median_gyro_yaw,
        median_acc_x, median_acc_y, median_acc_z,
        mean_gyro_roll, mean_gyro_pitch, mean_gyro_yaw]

run_config=tf.estimator.RunConfig().replace(
    session_config=tf.ConfigProto(device_count={'GPU': 0})
)

def input_fn(data_file):
    assert tf.gfile.Exists(data_file),('%s not found')
    records_default = [['neutral'],
                       [0.0], [0.0], [0.0],
                       [0.0], [0.0], [0.0],
                       [0.0], [0.0], [0.0],
                       [0.0], [0.0], [0.0]]
    csv_columns = [
                    'rotor',
                    'mean_acc_x','mean_acc_y','mean_acc_z',
                    'mean_gyro_roll','mean_gyro_pitch','mean_gyro_yaw',
                    'median_acc_x','median_acc_y','median_acc_z',
                    'median_gyro_roll','median_gyro_pitch','median_gyro_yaw'
    ]
    
    def parse_csv(value):
        print('PARSING:', data_file)
        columns = tf.decode_csv(value, records_default)
        features = dict(zip(csv_columns, columns))
        labels = features.pop('rotor')
        print('LABELS:', labels)
        return features, labels
    
    dataset = tf.data.TextLineDataset(data_file)
    dataset = dataset.shuffle(200)
    dataset = dataset.map(parse_csv, 4)
    dataset = dataset.batch(200)
    return dataset

model = tf.estimator.LinearClassifier(
    model_dir=model_dir,
    feature_columns=imu_window_feature_columns,
    config=run_config,
    n_classes=5,
    label_vocabulary=['backward', 'forward', 'left', 'neutral', 'right']
)
# model = tf.estimator.DNNClassifier(
#     model_dir=model_dir,
#     feature_columns=imu_window_feature_columns,
#     config=run_config,
#     hidden_units=[100, 75, 50, 25],
#     n_classes=4,
#     label_vocabulary=['1', '2', '3', '4']
# )



#  <=== Train and evaluate the model every `FLAGS.epochs_per_eval` epochs.  ===>
for n in range(40 // 2):
    model.train(input_fn=lambda: input_fn(
        train_data))

    results = model.evaluate(input_fn=lambda: input_fn(
        eval_data))

    # Display evaluation metricshttps://docs.aws.amazon.com/sagemaker/latest/dg/tf-training-inference-code-template.html
    print('Results at epoch', (n + 1) * 2)
    print('-' * 60)

# model.train(input_fn=lambda: input_fn(
#     train_data))

# results = model.evaluate(input_fn=lambda:input_fn(
#     train_data
# ))

print('[DONE]')