# Project 22: Activity Recognition
## Authors: Alessandro Pomes, Simon Schmoll
## Objectives: Classification of 7 activities which are tracked with a Single Chest-Mounted Accelerometer
## What is done in the Notebook: The data is imported, processed and classified
## As we followed a modular approach firstly the functions are defined which are later called for execution


## Importing the libraries


In [7]:
import pandas as pd #import panda for importing the dataset
import numpy as np

#Feature selection
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
#chi2 as we are dealing with a classifcation problem

#imports for the classification
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_predict



# Importing of the dataset
## Method definition for reading one of the available datasets

In [8]:
# Specifying engine = python because c engine can not handle 'sep'
# @params: dataNum
# @output: dataset (Dataframe)
def read(data_num):
    dataset = pd.read_csv('data/%d.csv' % (data_num), sep=',', header=None, engine='python', names=names_attributes)

    # Comment in for printing out the array data and the size of the array
    # print(array_data)
    # print(np.size(array_data, 0))

    return dataset

# Checking for missing data
## In the following lines, we check for missing values as these can falsify our data extraction

In [None]:
 # definition of the function for deleting rows with '0' as a label
 # @input: Pandas Dataframe, value of lable to delete
 # @output: array of int without lable 0
def zeroDet( dataset, value ):
    num = []
    num=(dataset.loc[dataset['lable'] == value].index.values)
    dataClean=(dataset.drop(num))
    dataClean = dataClean.as_matrix()
    dataClean = dataClean.astype(np.int)
    return dataClean

#here we are trying to detect if there are some
#missing data in all dataset.
#To prove there aren't missing data,
#output should generate empty arrays
num = 1
while(num < 16):
    dataset = read_csv('data/%d.csv' % (num) , names=header)
    boolData = dataset.isnull()
    for name in header:
        print(boolData.loc[boolData[name] == True])
    num += 1

# Feature Engineering
## The goal is to extract features from the preprocessed numpy array
## But before we have to do a preprocessing step
## 1. Step is to sequence the data in windows with 52 instances 
## Sidenote: it is of high importance to not mix two labels into the same window

In [None]:
# For Feature Extraction we use a technique called window overlapping (Pierluigi Casale, Oriol Pujol, and Petia Radeva. Human activity recognition from accelerometer
# data using a wearable device. Pattern Recognition and Image Analysis, pages 289–296, 2011). It has an overlap of 50% between the different
# time series. As a time window 1 second is use --> corresponds to 52 samplings (52 Hz frequency)
# Then we start with the sequencing
# Slicing needs to be done as follows:
# - it is not possible that 2 activities are grouped in one sequence (would falsify the outcome of the mean value)
# - therefore only labels with the same value are grouped into one sequence
# @params: array_data is list of array that contains the grouped data
# @output: data_list which contains numpy arrays with the respective windows

def grouping(array_data):
    start = int(0)
    end = int(52)
    data_list = []
    length = np.size(array_data, 0)
    while start < length-52:
        if(array_data[start][4] != array_data[end-1][4]):        # this control sequence is necessary to ensure that not two of the same
            while(array_data[start][4] != array_data[end-1][4]): # labels are in one window
                end = end -1
            newArray = array_data[slice(start, end)]
            start = end
            end = end + 52
        else:
            newArray = array_data[slice(start, end)]
            start = start + 26
            end = end + 26
        data_list.append(newArray)
        if(end-52 > length - 1):
            end = length-1

    # Comment in to show the size and length of the data_list array
    # print(np.size(data_list))
    # print(len(data_list))
    return data_list

# This is an additional function which could be called to print a data list to a text file (e.g to examine it)
# Comment in for printing the data to a text file
# def sysout_to_text(dataList):
#     file = open("tempFile", "w")
#     for item in dataList:
#         file.write("%s\n" % item)
#     file.close()

# Feature Extraction
# 2. Step we want to extract two feature types for each window (6 different features for each window - x-, y-, z- axis)

In [11]:
#Now we need to get the mean value and standard deviation of all windows
#@Params: grouped data_List containing the window arrays
#@Output: mean value of x, y, z, standard deviation of the coordinates, target array
def extract_features(data_list):
    total_average_values = []
    total_label = []
    for row in data_list:
        acceleration = np.nanmean(row, 0)
        standard_deviation = np.std(row, 0)
        temp_features = [acceleration[1], acceleration[2], acceleration[3], standard_deviation[1], standard_deviation[2], standard_deviation[3]]
        label_array = [row[0][4]]
        total_average_values.append(temp_features)
        total_label.append(label_array)
    print(total_average_values)
    print(total_label)
    feature = np.vstack(total_average_values)
    target = np.vstack(total_label)

    # comment in to print out lists
    # print(feature)
    # print(target)
    return feature, target

# Classification