In [15]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm

ACTIVITIES = np.arange(1,6)

def plot_xyz(time, x,y,z, activity):
    plt.figure()
    plt.plot(time, x, label='X')
    plt.plot(time, y, label='Y')
    plt.plot(time, z, label='Z')

    plt.title("activity = {}".format(activity))
    plt.xlabel('Time (Seconds)')
    plt.grid(True)
    plt.axis('tight')
    plt.legend(loc='upper left')
    
def graph_all_features(time: np.array, csv: "csv format"):
    for activity in ACTIVITIES:
        x, y, z, _ = extract_activity(csv, activity)
        plot_xyz(time, x, y, z, activity)
    plt.show(False)    

def extract_activity(csv: "csv format", activity:int):
    return csv.T[1+(activity-1)*4:5+(activity-1)*4]

def compute_window(X, window_size = 10, func = np.mean):
    window = np.zeros(X.shape)
    for i in range(X.shape[0]):
        #window[i] = np.sum(X[max(0, i-10):min(X.shape[0]-1, i+10)], axis = 0)
        window[i] = func(X[max(0, i-window_size):min(X.shape[0]-1, i+window_size)], axis = 0)
    return window

def split_data(data, seed = 0):
    ''' shuffle and split the data into a training and validation dataset
    inspired by the ml library from cs 273a
    '''
    np.random.seed(seed)
    pi = np.random.permutation(data.shape[0])
    shuffled_data = data[pi,:]
    split = int(data.shape[0]*0.8)
    training_data = data[pi,:][:split]
    validation_data = data[pi,:][split:]

    return training_data, validation_data

def calculate_mean(csv, window_size):
    features = np.empty(shape = (0, 7))
    for i in ACTIVITIES:
        activity_i = extract_activity(csv, i).T
        Xtr_i, Ytr_i = activity_i[:,:-1], activity_i[:,-1, np.newaxis]
        Xtr_mean_i = compute_window(Xtr_i, 1000, func = np.mean)
        features_i = np.concatenate((Xtr_i, Xtr_mean_i, Ytr_i), axis = 1)
        features = np.concatenate((features, features_i), axis = 0)
    return features

def calculate_meanV(csv, window_size):
    features = np.empty(shape = (0, 7))
    i = 1;
    activity_i = extract_activity(csv, i).T
    Xtr_i, Ytr_i = activity_i[:,:-1], activity_i[:,-1, np.newaxis]
    Xtr_mean_i = compute_window(Xtr_i, 1000, func = np.mean)
    features_i = np.concatenate((Xtr_i, Xtr_mean_i, Ytr_i), axis = 1)
    features = np.concatenate((features, features_i), axis = 0)
    return features

if __name__ == '__main__':
    csv = np.genfromtxt("takashin_[Students]assignment5_sampleXYZ.csv",
                         delimiter=',', skip_header=1)#, usecols=(0, 1, 2, 3))
    csvV = np.genfromtxt("sensor.csv",
                         delimiter=',', skip_header=1)#, usecols=(0, 1, 2, 3))
    time = csv.T[0]
    #graph_all_features(time, csv)
    features = calculate_mean(csv, 10)
    featuresV = calculate_meanV(csvV, 10)
    print(featuresV)
    training_data, validation_data = split_data(features, seed=0)
    #train on partial data
    training_data = training_data[::2]
    clf = svm.SVC()
    clf.fit(training_data[:,:-1], training_data[:,-1])
    training_MSE = np.mean(
                    (clf.predict(training_data[:,:-1]) - training_data[:,-1]) ** 2
                    )
    validation_MSE = np.mean(
                    (clf.predict(featuresV[:,:-1]) - featuresV[:,-1]) ** 2
                    )
    training_MAE = np.where(clf.predict(training_data[:,:-1]) != training_data[:,-1])[0].shape[0] / training_data.shape[0]
    validation_MAE = np.where(clf.predict(featuresV[:,:-1]) != featuresV[:,-1])[0].shape[0] / featuresV.shape[0]
    print("MSE", training_MSE, validation_MSE)
    print("MAE", training_MAE, validation_MAE)



[[-0.087071   -0.047038    0.473385   ..., -0.09034541  0.96139011  2.        ]
 [-0.126102   -0.075061    0.862701   ..., -0.09034514  0.9613905   2.        ]
 [-0.156127   -0.090073    0.932758   ..., -0.09034287  0.96138889  2.        ]
 ..., 
 [-0.240195   -0.019015    0.964784   ..., -0.0508257   0.96455613  2.        ]
 [-0.245199   -0.022018    0.944768   ..., -0.05081749  0.9645789   2.        ]
 [-0.245199   -0.030024    0.951773   ..., -0.05081326  0.9645817   2.        ]]
MSE 0.0 9.0
MAE 0.0 1.0
