In [1]:
import sys
assert sys.version_info >= (3, 5)

\
# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# TensorFlow ≥2.0 is required
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= "2.0"

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)
tf.random.set_seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt


In [2]:
import pandas as pd
df = pd.read_csv('../../DL/data/train.csv')
df.head()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject,Activity
0,0.288585,-0.020294,-0.132905,-0.995279,-0.983111,-0.913526,-0.995112,-0.983185,-0.923527,-0.934724,...,-0.710304,-0.112754,0.0304,-0.464761,-0.018446,-0.841247,0.179941,-0.058627,1,STANDING
1,0.278419,-0.016411,-0.12352,-0.998245,-0.9753,-0.960322,-0.998807,-0.974914,-0.957686,-0.943068,...,-0.861499,0.053477,-0.007435,-0.732626,0.703511,-0.844788,0.180289,-0.054317,1,STANDING
2,0.279653,-0.019467,-0.113462,-0.99538,-0.967187,-0.978944,-0.99652,-0.963668,-0.977469,-0.938692,...,-0.760104,-0.118559,0.177899,0.100699,0.808529,-0.848933,0.180637,-0.049118,1,STANDING
3,0.279174,-0.026201,-0.123283,-0.996091,-0.983403,-0.990675,-0.997099,-0.98275,-0.989302,-0.938692,...,-0.482845,-0.036788,-0.012892,0.640011,-0.485366,-0.848649,0.181935,-0.047663,1,STANDING
4,0.276629,-0.01657,-0.115362,-0.998139,-0.980817,-0.990482,-0.998321,-0.979672,-0.990441,-0.942469,...,-0.699205,0.12332,0.122542,0.693578,-0.615971,-0.847865,0.185151,-0.043892,1,STANDING


In [3]:
from sklearn.utils import shuffle
df = shuffle(df)

In [4]:
from sklearn.preprocessing import LabelEncoder

df_cat = df['Activity']
encoder = LabelEncoder()
encoder.fit(df_cat)
df_encoded = encoder.transform(df_cat)

In [5]:
df['Activity'] = df_encoded
df['Activity']

4525    4
1446    4
5995    3
4222    1
6754    4
       ..
5191    3
5226    4
5390    3
860     2
7270    3
Name: Activity, Length: 7352, dtype: int32

In [6]:
X = df.drop(['subject', 'Activity'], axis=1).copy() # subject와 Activity 열 삭제
X

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-meanFreq(),fBodyBodyGyroJerkMag-skewness(),fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)"
4525,0.283203,-0.047024,-0.168986,0.384949,0.176898,-0.310332,0.381757,0.122611,-0.332984,0.465563,...,0.454181,-0.730593,-0.932568,-0.034924,0.558036,0.258975,-0.854858,-0.784330,0.222960,-0.066506
1446,0.256904,-0.036623,-0.133856,0.201409,-0.154142,0.344183,0.092771,-0.182114,0.291899,0.586004,...,0.190745,-0.032958,-0.338095,0.017986,-0.475545,0.942947,-0.567147,-0.665156,0.178975,0.243362
5995,0.291316,-0.001065,-0.072461,-0.336609,-0.279162,-0.303323,-0.381421,-0.253026,-0.337381,0.074430,...,0.299767,-0.351997,-0.698254,-0.044663,-0.551806,-0.680774,0.161405,-0.857202,0.192929,0.046716
4222,0.276116,-0.010909,-0.102886,-0.992196,-0.982169,-0.981127,-0.992838,-0.981366,-0.979864,-0.931924,...,0.335848,-0.623358,-0.906098,-0.177275,-0.309304,0.176515,-0.143818,-0.625215,-0.106678,-0.147469
6754,0.256382,0.000428,-0.113664,0.075014,0.046502,-0.369482,-0.024794,-0.085337,-0.385018,0.562230,...,-0.125979,0.020630,-0.316113,0.347143,0.590475,0.831084,0.465453,-0.860617,0.150598,-0.067109
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5191,0.278897,-0.030306,-0.096043,-0.555352,-0.104055,-0.438064,-0.572530,-0.112149,-0.429688,-0.479744,...,0.022624,0.417731,0.200267,-0.155279,-0.137981,0.929236,-0.583898,-0.703989,0.159112,-0.185944
5226,0.289183,-0.049248,-0.125083,-0.290043,-0.212102,-0.469731,-0.307317,-0.209558,-0.528635,-0.283244,...,-0.039773,-0.021970,-0.360982,-0.019533,-0.896005,-0.039591,0.494178,-0.670139,0.106156,-0.220746
5390,0.293946,-0.018341,-0.119916,-0.627198,-0.216566,-0.424764,-0.648666,-0.253814,-0.417569,-0.547082,...,0.094031,0.527152,0.348736,-0.414621,-0.488542,0.095895,-0.297162,-0.856512,0.183547,-0.034939
860,0.280475,-0.018976,-0.113756,-0.994825,-0.985314,-0.965857,-0.995170,-0.984285,-0.963293,-0.937503,...,0.539885,-0.889671,-0.984051,-0.079515,0.014291,-0.290347,0.602344,-0.839693,0.081388,-0.098270


In [8]:
y = df['Activity'].copy() # 데이터셋에서 Activity 분리
y

4525    4
1446    4
5995    3
4222    1
6754    4
       ..
5191    3
5226    4
5390    3
860     2
7270    3
Name: Activity, Length: 7352, dtype: int32

In [10]:
print(X.shape, y.shape)

(7352, 561) (7352,)


In [13]:
from functools import partial

SeluDense = partial(keras.layers.Dense,
                   activation = "selu",
                   kernel_initializer = "lecun_normal")


def build_model():
    model = keras.models.Sequential([
        keras.layers.Flatten(input_shape=(X.shape[1],)),
        keras.layers.Dropout(0.3),
        keras.layers.BatchNormalization(),
        SeluDense(300),
        keras.layers.Dropout(0.3),
        keras.layers.BatchNormalization(),
        SeluDense(300),
        keras.layers.Dropout(0.3),
        keras.layers.BatchNormalization(),
        SeluDense(200),
        keras.layers.BatchNormalization(),
        SeluDense(200),
        keras.layers.BatchNormalization(),
        SeluDense(100),
        keras.layers.BatchNormalization(),
        SeluDense(100),
        keras.layers.BatchNormalization(), 
        keras.layers.Dense(6, activation="softmax")
    ])

    model.compile(loss="sparse_categorical_crossentropy",
                  optimizer="adam",
                  metrics=["accuracy"])
    return model

In [18]:
import numpy as np

k = 10
num_val_samples = len(X) // k
num_epochs = 300
all_scores = []
for i in range(k):
    print('processing fold #', i)

    X_val = X[i * num_val_samples: (i + 1) * num_val_samples]
    y_val = y[i * num_val_samples: (i + 1) * num_val_samples]

    partial_train_data = np.concatenate(
        [X[:i * num_val_samples],
         X[(i + 1) * num_val_samples:]],
        axis=0)
    partial_train_targets = np.concatenate(
        [y[:i * num_val_samples],
         y[(i + 1) * num_val_samples:]],
        axis=0)

    model = build_model()
    model.fit(partial_train_data, partial_train_targets,
              epochs=num_epochs, verbose=0)

    loss, acc = model.evaluate(X_val, y_val, verbose=0)
    all_scores.append(acc)

processing fold # 0
processing fold # 1
processing fold # 2
processing fold # 3
processing fold # 4
processing fold # 5
processing fold # 6
processing fold # 7
processing fold # 8
processing fold # 9


In [19]:
all_scores

[0.9931972622871399,
 0.9850339889526367,
 0.9918367266654968,
 0.9863945841789246,
 0.9931972622871399,
 0.9959183931350708,
 0.9931972622871399,
 0.9904761910438538,
 0.9891156554222107,
 0.9931972622871399]

In [20]:
np.mean(all_scores)

0.9911564588546753

In [19]:
all_scores

[0.9931972622871399,
 0.9904761910438538,
 0.9918367266654968,
 0.9795918464660645,
 0.9904761910438538,
 0.994557797908783,
 0.9904761910438538,
 0.9931972622871399,
 0.9918367266654968,
 0.9972789287567139]

In [20]:
np.mean(all_scores)

0.9912925124168396

In [21]:
import numpy as np

k = 10
num_val_samples = len(X) // k
num_epochs = 300
all_scores = []
for i in range(k):
    print('processing fold #', i)

    X_val = X[i * num_val_samples: (i + 1) * num_val_samples]
    y_val = y[i * num_val_samples: (i + 1) * num_val_samples]

    partial_train_data = np.concatenate(
        [X[:i * num_val_samples],
         X[(i + 1) * num_val_samples:]],
        axis=0)
    partial_train_targets = np.concatenate(
        [y[:i * num_val_samples],
         y[(i + 1) * num_val_samples:]],
        axis=0)

    model = build_model()
    model.fit(partial_train_data, partial_train_targets,
              epochs=num_epochs, verbose=0)

    loss, acc = model.evaluate(X_val, y_val, verbose=0)
    all_scores.append(acc)

processing fold # 0
processing fold # 1
processing fold # 2
processing fold # 3
processing fold # 4
processing fold # 5
processing fold # 6
processing fold # 7
processing fold # 8
processing fold # 9


In [22]:
all_scores

[0.9959183931350708,
 0.9863945841789246,
 0.9891156554222107,
 0.9836734533309937,
 0.9877551198005676,
 0.994557797908783,
 0.9918367266654968,
 0.9904761910438538,
 0.9931972622871399,
 0.9959183931350708]

In [24]:
np.mean(all_scores)

0.9908843576908112