In [1]:
import numpy as np

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.initializers import he_normal

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

import seaborn as sns
sns.set(font='Yu Gothic')
import matplotlib.pyplot as plt
%matplotlib inline

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [2]:
def load_npy(hold_position):
    file_path = "../Output/train/train_" + hold_position + "/train_" + hold_position
    xy_mean = np.load(file_path + "_glo_laccel_xy_mean.npy")
    xy_var = np.load(file_path + "_glo_laccel_xy_var.npy")
    z_mean = np.load(file_path + "_glo_laccel_z_mean.npy")
    z_var = np.load(file_path + "_glo_laccel_z_var.npy")
    z_skew = np.load(file_path + "_glo_laccel_z_skew.npy")
    z_kurtosis = np.load(file_path + "_glo_laccel_z_kurtosis.npy")
    z_lacc_FFT = np.load(file_path + "_glo_laccel_z_amplitude_frequency_range5Hz.npy")
    z_gyro_FFT = np.load(file_path + "_glo_gyro_z_ver2_amplitude_frequency_range5Hz.npy")
    result = np.concatenate([xy_mean.reshape([-1, 1]), xy_var.reshape([-1, 1]), z_mean.reshape([-1, 1]), z_var.reshape([-1, 1]), z_skew.reshape([-1, 1]), z_kurtosis.reshape([-1, 1]), z_lacc_FFT, z_gyro_FFT], axis=1)
    del xy_mean, xy_var, z_mean, z_var, z_skew, z_kurtosis, z_gyro_FFT
    return result

In [3]:
train_Bag = load_npy("Bag")
train_Hips = load_npy("Hips")
train_Hips = np.delete(train_Hips, 120845, 0)
train_Torso = load_npy("Torso")
train_Hand = load_npy("Hand")

train_Bag.shape, train_Hips.shape

((195491, 50), (195490, 50))

In [4]:
X_train = np.concatenate([train_Bag, train_Hips, train_Torso, train_Hand], axis=0)
X_train.shape

(781963, 50)

In [5]:
Y_train = np.load("../Data/センサ別npyファイル/train/train_Bag/train_Bag_Label.npy")[:, 0]
Y_train = np.concatenate([Y_train, np.delete(Y_train, 120845, 0), Y_train, Y_train], axis=0)
Y_train.shape

(781963, 1)

In [6]:
def load_npy(hold_position):
    file_path = "../Output/validation/validation_" + hold_position + "/validation_" + hold_position
    xy_mean = np.load(file_path + "_glo_laccel_xy_mean.npy")
    xy_var = np.load(file_path + "_glo_laccel_xy_var.npy")
    z_mean = np.load(file_path + "_glo_laccel_z_mean.npy")
    z_var = np.load(file_path + "_glo_laccel_z_var.npy")
    z_skew = np.load(file_path + "_glo_laccel_z_skew.npy")
    z_kurtosis = np.load(file_path + "_glo_laccel_z_kurtosis.npy")
    z_lacc_FFT = np.load(file_path + "_glo_laccel_z_amplitude_frequency_range5Hz.npy")
    z_gyro_FFT = np.load(file_path + "_glo_gyro_z_ver2_amplitude_frequency_range5Hz.npy")
    result = np.concatenate([xy_mean.reshape([-1, 1]), xy_var.reshape([-1, 1]), z_mean.reshape([-1, 1]), z_var.reshape([-1, 1]), z_skew.reshape([-1, 1]), z_kurtosis.reshape([-1, 1]), z_lacc_FFT, z_gyro_FFT], axis=1)
    del xy_mean, xy_var, z_mean, z_var, z_skew, z_kurtosis, z_gyro_FFT
    return result

In [7]:
validation_Bag = load_npy("Bag")
validation_Hips = load_npy("Hips")
validation_Torso = load_npy("Torso")
validation_Hand = load_npy("Hand")

validation_Bag.shape

(28685, 50)

In [8]:
X_val = np.concatenate([validation_Bag, validation_Hips, validation_Torso, validation_Hand], axis=0)
X_val.shape

(114740, 50)

In [9]:
Y_val = np.load("../Data/センサ別npyファイル/validation/validation_Bag/validation_Bag_Label.npy")[:, 0].reshape([-1, 1])
Y_val = np.concatenate([Y_val, Y_val, Y_val, Y_val], axis=0)
Y_val.shape

(114740, 1)

In [10]:
Y_val_hold_position = np.zeros((28685*4, 1))
for i in range(4):
    Y_val_hold_position[28685*i:28685*(i+1)] = i
Y_val_hold_position.shape

(114740, 1)

In [11]:
Y_val = np.concatenate([Y_val, Y_val_hold_position], axis=1)
Y_val.shape

(114740, 2)

In [12]:
Y_train[Y_train >= 5] = 1
Y_val[Y_val >= 5] = 1

## 標準化

In [13]:
std = StandardScaler()
hz_std = StandardScaler()

X_train[:, [0,1,2,3,4,5,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48]] = std.fit_transform(X_train[:, [0,1,2,3,4,5,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48]])
X_val[:, [0,1,2,3,4,5,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48]] = std.fit_transform(X_val[:, [0,1,2,3,4,5,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48]])

X_train[:, [i for i in range(11, 50)]] = hz_std.fit_transform(X_train[:, [i for i in range(11, 50)]])
X_val[:, [i for i in range(11, 50)]] = hz_std.transform(X_val[:, [i for i in range(11, 50)]])

X_train.shape, X_val.shape

((781963, 50), (114740, 50))

## 丸める3より5の方が精度高かった

In [14]:
X_train = np.round(X_train, 5)
X_val = np.round(X_val, 5)
X_train

array([[-0.48098, -0.20679,  0.47418, ...,  0.27292,  0.02477, -1.81045],
       [ 0.25378, -0.01536,  0.19819, ...,  0.93347,  0.19536,  0.37646],
       [-0.21472, -0.21009,  0.12586, ...,  0.40503, -0.24621, -0.02116],
       ...,
       [-0.35028, -0.2537 ,  0.20789, ..., -1.24637, -0.28993,  0.64154],
       [-0.35096, -0.2542 ,  0.09994, ...,  0.73531, -0.27106, -1.34656],
       [-0.35675, -0.25241,  0.13752, ...,  1.39587, -0.26832,  0.31019]])

In [15]:
np.sum(np.isnan(X_train)), np.sum(np.isnan(Y_train)), np.unique(Y_train), np.unique(Y_val[:, 0])

(0, 0, array([1., 2., 3., 4.]), array([1., 2., 3., 4.]))

In [16]:
Y_train = Y_train - 1
Y_train = Y_train.astype(np.int32)
Y_val[:, 0] = Y_val[:, 0] - 1
Y_val = Y_val.astype(np.int32)

In [17]:
Y_train.shape, Y_val.shape

((781963, 1), (114740, 2))

In [18]:
from tensorflow.keras import backend as K
def f1(y_true, y_pred):
    def recall(y_true, y_pred):
        """Recall metric.
        Only computes a batch-wise average of recall.
        Computes the recall, a metric for multi-label classification of
        how many relevant items are selected.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    def precision(y_true, y_pred):
        """Precision metric.
        Only computes a batch-wise average of precision.
        Computes the precision, a metric for multi-label classification of
        how many selected items are relevant.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision
    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [19]:
model = tf.keras.Sequential()
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu', input_shape=(X_train.shape[1], 1), kernel_initializer=he_normal()))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dense(4, activation='softmax'))
model.compile(optimizer=tf.keras.optimizers.Adam(0.01),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy', f1])

In [None]:
history = model.fit(X_train, Y_train, epochs=128, batch_size=2048, validation_data=(X_val, Y_val[:, 0]), class_weight={0:1, 1:5.69, 2:16.48, 3:5.93})

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train on 781963 samples, validate on 114740 samples
Epoch 1/128
Epoch 2/128
Epoch 3/128
Epoch 4/128
Epoch 5/128
Epoch 6/128
Epoch 7/128
Epoch 8/128
Epoch 9/128
Epoch 10/128
Epoch 11/128
Epoch 12/128
Epoch 13/128
Epoch 14/128
Epoch 15/128
Epoch 16/128
Epoch 17/128
Epoch 18/128
Epoch 19/128
Epoch 20/128
Epoch 21/128
Epoch 22/128
Epoch 23/128
Epoch 24/128
Epoch 25/128
Epoch 26/128
Epoch 27/128
Epoch 28/128
Epoch 29/128
Epoch 30/128
Epoch 31/128
Epoch 32/128
Epoch 33/128
Epoch 34/128
Epoch 35/128
Epoch 36/128
Epoch 37/128
Epoch 38/128
Epoch 39/128
Epoch 40/128
Epoch 41/128
Epoch 42/128
Epoch 43/128
Epoch 44/128
Epoch 45/128
Epoch 46/128
Epoch 47/128
Epoch 48/128
Epoch 49/128
Epoch 50/128
Epoch 51/128
Epoch 52/128
Epoch 53/128
Epoch 54/128
Epoch 55/128
Epoch 56/128
Epoch 57/128
Epoch 58/128
Epoch 59/128
Epoch 60/128
Epoch 61/128
Epoch 62/128
Epoch 63/128
Epoch 64/128
Epoch 65/128
Epoch 66/128
Epoch 67/128
Epoch 68/128
Epoch 69/128
Epoch 70/1

In [None]:
model.summary()

In [None]:
# Plot training & validation accuracy values
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.ylim((0, 3.5))
plt.show()

In [None]:
def plot_confusion_matrix(test_y,pred_y,class_names,normalize=False):
    cm = confusion_matrix(test_y,pred_y)
    # classes = class_names[unique_labels(test_y,pred_y)]
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    fig, ax = plt.subplots()
    im = ax.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    ax.figure.colorbar(im, ax=ax)
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           xticklabels=class_names,
           yticklabels=class_names,
           ylabel='True label\n',
           xlabel='\nPredicted label')
    fmt = '.2f' if normalize else 'd'
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j,
                    i,
                    format(cm[i, j], fmt),
                    ha="center",
                    va="center",
                    color="red", fontsize=16)
    fig.tight_layout()
    return ax

In [None]:
class_names = ['Still', 'Walking', 'Run', 'Bike', 'Car', 'Bus', 'Train', 'Subway']
predict = model.predict_classes(X_val)
plot_confusion_matrix(Y_val[:, 0], predict, class_names, True)
plt.grid(False)
f1_macro = f1_score(Y_val[:, 0], predict, average='macro')
round(f1_macro, 3)

In [None]:
hold_positions = ["Bag", "Hips", "Torso", "Hand"]
for i in range(4):
    plot_confusion_matrix(Y_val[Y_val[:, 1]==i][:, 0], predict[Y_val[:, 1]==i], class_names, True)
    print(round(f1_score(Y_val[Y_val[:, 1]==i][:, 0], predict[Y_val[:, 1]==i], average='macro'), 3))
    plt.title(hold_positions[i], fontsize=24)
    plt.grid(False)

# データ数をRunのサンプル数に合わせる

In [None]:
train_Still = X_train[(Y_train==0)[:, 0]]
train_Walking = X_train[(Y_train==1)[:, 0]]
train_Run = X_train[(Y_train==2)[:, 0]]
train_Bike = X_train[(Y_train==3)[:, 0]]

train_Still.shape, train_Walking.shape, train_Run.shape, train_Bike.shape

In [None]:
train_Still = train_Still[np.random.choice(train_Still.shape[0], 33699, replace=False)]
train_Walking = train_Walking[np.random.choice(train_Walking.shape[0], 33699, replace=False)]
train_Bike = train_Bike[np.random.choice(train_Bike.shape[0], 33699, replace=False)]

train_Still.shape, train_Walking.shape, train_Bike.shape

In [None]:
X_train = np.concatenate([train_Still, train_Walking, train_Run, train_Bike], axis=0)
Y_train = [0] * 33699 + [1] * 33699 + [2] * 33699 + [3] * 33699
Y_train = np.array(Y_train).reshape([-1, 1])
X_train.shape, Y_train.shape

In [None]:
model = tf.keras.Sequential()
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu', input_shape=(X_train.shape[1], 1), kernel_initializer=he_normal()))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dense(4, activation='softmax'))
model.compile(optimizer=tf.keras.optimizers.Adam(0.01),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy', f1])

In [None]:
history = model.fit(X_train, Y_train, epochs=64, batch_size=512, validation_data=(X_val, Y_val[:, 0]))

In [None]:
# Plot training & validation accuracy values
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.ylim((0, 3.5))
plt.show()

In [None]:
class_names = ['Still', 'Walking', 'Run', 'Bike', 'Car', 'Bus', 'Train', 'Subway']
predict = model.predict_classes(X_val)
plot_confusion_matrix(Y_val[:, 0], predict, class_names, True)
plt.grid(False)
f1_macro = f1_score(Y_val[:, 0], predict, average='macro')
round(f1_macro, 3)

In [None]:
hold_positions = ["Bag", "Hips", "Torso", "Hand"]
for i in range(4):
    plot_confusion_matrix(Y_val[Y_val[:, 1]==i][:, 0], predict[Y_val[:, 1]==i], class_names, True)
    print(round(f1_score(Y_val[Y_val[:, 1]==i][:, 0], predict[Y_val[:, 1]==i], average='macro'), 3))
    plt.title(hold_positions[i], fontsize=24)
    plt.grid(False)