In [5]:
# what is in this file:
# Cell 2: the imports that would need to be included
# Cell 3: an existing code block provided for you to create training/test dataset (feature dataset)
# Cell 4: Challenge 1
# Cell 5: Challenge 2
# Cell 6: an existing code block provided for you to  train the machine learning model, test the machine learning model, and see the results
# Cell 7: Challenge 3
# Cell 8: Challenge 4
# Cell 9: Submission preparation






In [6]:
import numpy as np 
import pandas as pd 
from scipy import signal
import matplotlib.pyplot as plt 
import math
from sklearn import preprocessing
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import make_scorer, accuracy_score, confusion_matrix
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

In [9]:


def create_feature_data_from_files(list_of_filenames, output_filename):

    #create the empty training set where we are going to add our "features"
    feature_set = np.empty(shape=(0, 10))
    
    for dataset_file in list_of_filenames:

        #import the file contents into a panadas data frame
        imported_data = pd.read_csv(dataset_file, sep=',', header=None)

        #generate "features" for each activitiy
        for activityNumber in range(1,14):
            
            #get all data relating to that activity and convert to a numpy ndarray
            activity_data = imported_data[imported_data[24] == activityNumber].values

            #smooth over the data for columns 0, 1, 2, ...23 (not column 24)
            b, a = signal.butter(4, 0.04, 'low', analog=False)
            for j in range(24):
                activity_data[:, j] = signal.lfilter(b, a, activity_data[:, j])
            
            #how many full rows of 1000 are there for this activity data?
            number_of_samples = int( len(activity_data)/1000 )
            print(  "File " + dataset_file +
                    " has " + str(number_of_samples) + " samples of 1000 rows"+
                    "for activity: " + str(activityNumber))
            
            #for each sample of 1000 rows... scan the data and add the scan results to training_set
            for sample_number in range(number_of_samples):
                #sample data (get the next 1000 rows and all the columns)
                sample_data = activity_data[ 
                                1000 * sample_number : 1000 * (sample_number + 1) , 
                                :
                            ]
                #we are about to build up a feature_sample that will have 10 columns
                feature_sample = []
                #sample from file 4 in week 7 prac
                for i in range(3):
                    feature_sample.append(np.min(sample_data[:, i]))
                    feature_sample.append(np.max(sample_data[:, i]))
                    feature_sample.append(np.mean(sample_data[:, i]))
                # add the activtiy number (The last column from the row of data)
                feature_sample.append(int(sample_data[0, -1])) 
                #make it in to an ndarray so it can be added to training data
                feature_sample = np.array([feature_sample]) 
                feature_set = np.concatenate((feature_set, feature_sample), axis=0)
            
    #now save all this training data into a file to be used at a later date
    df_feature = pd.DataFrame(feature_set)
    df_feature.to_csv(output_filename, index=None, header=None)
    print('attempted to create '+ output_filename +' ... check if the file was created!')
    print(str(len(feature_set)) + " data rows should be in the output file")





In [22]:
#Challenge 1:
#you need to create a testing data set based on dataset_11,12,13,14,15,16,17,18,and 19
#the testing dataset name is called week12_testing_data_9Participants.csv

#
# Start: create testing data
#


#
# End: create testing data
#



In [23]:
#Challenge 2:
#you need to create a training data set based on dataset_1,2,3,4,5,6,7,8,9 and 10
#the testing dataset name is called week12_training_data_10Participants.csv

#
# Start: create training data
#

#
# End: create training data
#

In [15]:

def model_training_and_evaluation(traing_file_name, testing_file_name):
    df_training = pd.read_csv(traing_file_name, header=None)
    df_testing = pd.read_csv(testing_file_name, header=None)

    training_labels = df_training[9].values
    # Labels should start from 0 in sklearn
    training_labels = training_labels - 1
    df_training = df_training.drop([9], axis=1)
    training_features = df_training.values

    testing_labels = df_testing[9].values
    testing_labels = testing_labels - 1
    df_testing = df_testing.drop([9], axis=1)
    testing_features = df_testing.values

    # Feature normalization for improving the performance of machine learning models. In this example code, 
    # StandardScaler is used to scale original feature to be centered around zero. You could try other normalization methods.
    scaler = preprocessing.StandardScaler().fit(training_features)
    training_features = scaler.transform(training_features)
    testing_features = scaler.transform(testing_features)

    # Build KNN classifier, in this example code
    knn = KNeighborsClassifier(n_neighbors=3)
    knn.fit(training_features, training_labels)

    # Evaluation. when we train a machine learning model on training set, we should evaluate its performance on testing set.
    # We could evaluate the model by different metrics. Firstly, we could calculate the classification accuracy. In this example
    # code, when n_neighbors is set to 4, the accuracy achieves 0.757.
    predicted_labels = knn.predict(testing_features)
    print('Accuracy: ', accuracy_score(testing_labels, predicted_labels))
    # We could use confusion matrix to view the classification for each activity.
    print(confusion_matrix(testing_labels, predicted_labels))


In [24]:
#Challenge 3
#use created 10 participants training data and 9 participants testing data to train and test the KNN model
#And interpret the results

# Begining your code here


# End
    
    
# Activity values: 1 – Sitting, 2 – Lying down, 3 – Standing, 4 – Washing Dishes, 5 – Vacuuming, 
# 6 – Sweeping, 7 – Walking outside, 8 – Ascending stairs, 9 – Descending stairs, 10 – Treadmill running, 11 – Bicycling, 12 – Bicycling (more intense), 13 – Rope Jumping.


# Can you interpret the confusion matrix below?
# what are the rows and what are the columns?
# which activity the machine learning predicts best?

#write your answers below for tutors to check
#Rows are for true labels
#Columns are for predicted labels 





In [25]:
# Challenge 4
# Let's try another way for training and testing 
# Hint: training data and testing data shall not overlap 
# Solution: use odd number for training, use even number for testing
# odd number means dataset_1, 3,5.....19, even number means dataset_2,4,6....18


#begin your code here

#end code

# Activity values: 1 – Sitting, 2 – Lying down, 3 – Standing, 4 – Washing Dishes, 5 – Vacuuming, 
# 6 – Sweeping, 7 – Walking outside, 8 – Ascending stairs, 9 – Descending stairs, 10 – Treadmill running, 11 – Bicycling, 12 – Bicycling (more intense), 13 – Rope Jumping.


# Can you interpret the confusion matrix above?
# what are the rows and what are the columns?
# which activity the machine learning predicts best?

# Has the result been improved?  why?

# How to improve?

#write your answers below for tutors to check
#Rows are for true labels
#Columns are for predicted labels 







In [None]:
# prepare your submission

# 1. prepare your code submission. if one file, name it historyCode(.ipynb), if multiple file, zip them and named it 
# historyCode.zip 

# 2. rename this python code file as main(.ipynb)

# 3. Take a screenshot of your github commit history, name it githubCommit(.png/jpg/gif)

# 4. Take a screenshot of your Agile task assignment (in Trello or whatever tools you used), name it agileTask.(.png/jpg/gif)

# 5. Submit your workbook as myWorkBook.pdf (if you don't record your logbook electronically, you need to take photos of your handwritten ones and create a PDF from these photos). 

# 6. call the tutor to check whether your submission files are correct and if correct, submit now (all the files mentioned above).

# 7. Ask the tutor what is your group's order in week 13's demo session
