In [33]:
# load data from csv file and save data into separate lists
import csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import normalize
from sklearn.metrics.cluster import normalized_mutual_info_score
from scipy.fftpack import fft, ifft
from sklearn.decomposition import PCA
import warnings
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn import svm
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report
import pickle

In [34]:
# when read the data from CSV, the time stamp and associated GMC value should be reversed
def read_raw_data():
    x_1 =[]
    x_2 =[]

    for i in range (5):
        x_1=[]
        x_2=[]
        with open(r'.\mealData'+str(i+1)+'.csv','rt')as f:
            data = csv.reader(f)
            rows_x=[row for idx, row in enumerate(data) if idx<50]# only use first 20 rows of the data
            for row in rows_x:
                x_1.append(row)
        with open(r'.\MealAmountData'+str(i+1)+'.csv','rt')as ff:
            data = csv.reader(ff)
            rows_x=[row for idx, row in enumerate(data) if idx<50]# only use first 20 rows of the data
            for row in rows_x:
                x_2.append(row)
        if i==0:
            x1 = x_1
            x2 = x_2
        elif i!=0:
            x1 = x1+x_1
            x2 = x2+x_2
    return x1,x2

# this func is used to remove the data which contains 'NaN' and only use the first 30 data
def smooth_data(y,x):
    idx = []
    size_y = len(y)
    for i in range (size_y):
        y[i] = y[i][:30]
        y[i] = y[i][::-1]
        if (len(y[i])!= 30):
            idx.append(i)
        elif 'NaN' in y[i]:
            idx.append(i)      
    for j in range (len(idx),0,-1):
        del y[idx[j-1]]
        del x[idx[j-1]]
    return y, x

In [35]:
x1,x2 = read_raw_data()
print('Number of rows from meal data:',len(x1))
print('Number of meal amount data:',len(x2))

x1, x2 = smooth_data(x1, x2)
print("Number of rows from the processed meal data: ",len(x1) )
print("Number of rows from the processed meal amount data: ",len(x2))

Number of rows from meal data: 250
Number of meal amount data: 250
Number of rows from the processed meal data:  211
Number of rows from the processed meal amount data:  211


In [36]:
def extract_ground_truth(x2):
    bin_truth = []
    for i in range (len(x2)):
        if int(x2[i][0]) == 0:
            bin_truth.append(1)
        elif (int(x2[i][0])>0) and (int(x2[i][0])<=20):
            bin_truth.append(2)
        elif (int(x2[i][0])>20) and (int(x2[i][0])<=40):
            bin_truth.append(3)
        elif (int(x2[i][0])>40) and (int(x2[i][0])<=60):
            bin_truth.append(4)
        elif (int(x2[i][0])>60) and (int(x2[i][0])<=80):
            bin_truth.append(5)
        elif (int(x2[i][0])>80) and (int(x2[i][0])<=100):
            bin_truth.append(6)
    return bin_truth  

In [37]:
bin_truth = extract_ground_truth(x2)
print(bin_truth)

[4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 6, 3, 5, 3, 5, 4, 6, 3, 6, 1, 5, 2, 6, 1, 5, 2, 6, 1, 5, 2, 6, 1, 5, 3, 6, 3, 1, 3, 1, 5, 1, 2, 3, 3, 1, 5, 1, 2, 3, 3, 2, 1, 5, 1, 2, 2, 3, 2, 1, 5, 3, 2, 3, 1, 2, 3, 2, 3, 2, 1, 5, 1, 1, 1, 1, 4, 1, 4, 1, 4, 1, 3, 4, 1, 4, 4, 1, 1, 3, 4, 1, 4, 1, 3, 4, 1, 1, 1, 4, 4, 2, 1, 1, 1, 1, 1, 4, 4, 1, 1, 1, 4, 2, 2, 2, 1, 2, 2, 1, 1, 3, 4, 2, 2, 2, 4, 2, 3, 1, 3, 4, 2, 2, 2, 4, 2, 3, 1, 3, 4, 2, 2, 2, 4, 2, 3, 1, 3, 4, 2, 2, 2, 4, 2, 3, 1, 3, 4, 2, 2, 4, 4, 4, 4, 4, 1, 4, 4, 1, 4, 4, 4, 1, 4, 4, 2, 2, 1, 4, 1, 4, 4, 2, 2, 1, 4, 4, 2, 2, 1, 4, 4, 2, 2, 1, 4, 3, 1, 1, 4, 1, 4, 3, 1]


In [84]:
# function for calculating the avg of changing velocity with window size 3, result in 10 features
def avg_vel(y):
    average = sum(y)/len(y)
    vel_y = []
    avg_vel = []
    window_size = 3
    for i in range (len(y)-1):
        vel = y[i+1]-y[i]
        vel_y.append(vel)
    np.asarray(vel_y)

    for i in range (int(len(y)/window_size)):
        if i != (int((len(y)/window_size)-1)):
            avg = np.average(vel_y[(i*3):(i*3)+3])
        avg_vel.append(avg)
    array_vel = np.asarray(avg_vel)
    array_vel = normalize(array_vel[:,np.newaxis], axis=0).ravel()
#     array_vel = (array_vel - min(array_vel))/(max(array_vel)-min(array_vel))
    return array_vel

def max_increase(y):
    change = []
    y = list(map(int, y))
    y_0 = y[5]
    y_max = max(y[5:])
    y_end = y[29]
    max_increase = (y_max - y_0)/y_0
    max_decrease = (y_max - y_end)/y_end
    before_change = max(y[:5])-min(y[:5])
    change.append(max_increase)
    change.append(max_decrease)
    change.append(before_change)
    change = np.asarray(change,dtype=np.float32)
    changed = normalize(change[:,np.newaxis], axis=0).ravel()
#     changed = (change-min(change))/(max(change)-min(change))
    return changed

In [85]:
array_x = np.asarray(x1[2],dtype=int)
cc = avg_vel(array_x)
print(cc)
dd = max_increase(array_x)
print(dd)

[-0.06823118 -0.04093871  0.02729247  0.40938706  0.66866553  0.57314188
  0.04093871 -0.05458494  0.15010859  0.15010859]
[0.23715839 0.         0.971471  ]


In [86]:
# extract feature and save it into feature metricx
for i in range(len(x1)):
    yy = np.asarray(x1[i],dtype=np.float32)
    f1 = avg_vel(yy)
    f2 = max_increase(yy)
    f12 = np.concatenate((f1, f2), axis=None)
    f1 = f12
    if i == 0:
        feature_m1 = f1
    else:
        feature_m1 = np.vstack((feature_m1,f1))

In [87]:
print(feature_m1)

[[ 0.22011779  0.64342123  0.33864275 ...  0.01708368  0.02572302
   0.99952304]
 [-0.01279635  0.2687233   0.21753791 ...  0.13975823  0.9273988
   0.34698594]
 [-0.06823117 -0.04093871  0.02729247 ...  0.23715839  0.
   0.971471  ]
 ...
 [ 0.14610927 -0.5977197  -0.5445891  ...  0.          0.07047059
   0.9975139 ]
 [-0.06292918 -0.23913091 -0.28947425 ...  0.          0.08656408
   0.9962463 ]
 [-0.07015745  0.08185035  0.19877942 ...  0.1998337   0.
   0.9798298 ]]


[1, 2, 3]
