In [9]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [10]:
import pandas as pd

df_data = pd.read_csv("/content/drive/MyDrive/2024-deeplearning/6주차/DSA_features.csv")

In [11]:
df_data.shape

(9120, 272)

In [12]:
grouped_data = df_data.groupby(['people', 'activity'])

time_series_data = {}

for (person, activity), group in grouped_data:
    features = group.drop(columns=['people', 'activity'])
    time_series_data[(person, activity)] = features.reset_index(drop=True)


In [13]:
from sklearn.preprocessing import StandardScaler

scalers = {}
for key, df in time_series_data.items():

    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(df)

    time_series_data[key] = pd.DataFrame(scaled_data, columns=df.columns)

    scalers[key] = scaler



In [14]:
import numpy as np

# 시퀀스 길이 및 배치 크기 설정
sequence_length = 50  # 시계열 데이터의 각 시퀀스 길이
batch_size = 32       # 배치 크기 설정

# 시계열 데이터를 트랜스포머 입력 형식으로 변환
def create_batches(data, sequence_length):
    """
    주어진 시계열 데이터를 시퀀스 길이에 맞춰 배치로 나눔.
    """
    num_sequences = len(data) // sequence_length
    data = data[:num_sequences * sequence_length]  # 남은 데이터를 잘라냄
    sequences = np.array(np.split(data, num_sequences))
    return sequences

# 모든 그룹에 대해 시퀀스 생성
batches = []
for key, df in time_series_data.items():
    sequences = create_batches(df.values, sequence_length)
    batches.append(sequences)

# 각 그룹별로 배치된 데이터를 NumPy 배열로 변환
batches = np.array(batches)

# 데이터의 형상 확인 (batch_size, sequence_length, num_features)
print(f"Batch shape: {batches.shape}")

# 예시로 첫 번째 배치 데이터를 출력해보기
print(f"First batch example:\n{batches[0, 0]}")


Batch shape: (152, 1, 50, 270)
First batch example:
[[-0.11134433 -0.18117774  0.         ...  1.43956848  1.36136308
   0.97373768]
 [-0.53132922  0.29716236  0.         ...  0.77366509  0.83059095
   1.03404759]
 [-0.09445489  2.15523075  0.         ... -0.00313359  0.12356143
   0.88986324]
 ...
 [-1.11505237  0.88865818  0.         ... -0.99038345 -1.01641109
  -0.69578333]
 [ 0.02319269 -1.32302185  0.         ... -0.96589909 -0.98271771
  -1.43637396]
 [ 1.6618204  -0.58493793  0.         ...  0.78297581  0.83842348
  -1.84116343]]


In [16]:
import numpy as np
from sklearn.preprocessing import LabelEncoder
import keras
from keras import layers

# x_train 만들기 (모든 시계열 데이터를 사용)
x_train = np.concatenate([df.values for df in time_series_data.values()])
x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))

# y_train 만들기 ('activity'를 라벨로 변환)
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(df_data['activity'])

# 데이터 셔플
idx = np.random.permutation(len(x_train))
x_train = x_train[idx]
y_train = y_train[idx]

# 클래스 개수 정의
n_classes = len(np.unique(y_train))

# y_train 값 조정 (-1이 있는 경우 0으로 변환, 없다면 이 부분은 생략 가능)
y_train[y_train == -1] = 0


In [21]:
from sklearn.model_selection import train_test_split

# 데이터 셔플 후 train과 test로 나누기 (80% train, 20% test)
x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.2, random_state=42)


In [17]:
print(y_train)

[ 2  7 12 ... 12 12  4]


In [18]:
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Attention and Normalization
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(inputs, inputs)
    x = layers.Dropout(dropout)(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(res)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    return x + res

In [22]:
def build_model(
    input_shape,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    mlp_units,
    dropout=0,
    mlp_dropout=0,
):
    inputs = keras.Input(shape=input_shape)
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D(data_format="channels_last")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    outputs = layers.Dense(n_classes, activation="softmax")(x)
    return keras.Model(inputs, outputs)

In [24]:
input_shape = x_train.shape[1:]

model = build_model(
    input_shape,
    head_size=256,
    num_heads=4,
    ff_dim=4,
    num_transformer_blocks=4,
    mlp_units=[128],
    mlp_dropout=0.4,
    dropout=0.25,
)

model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=keras.optimizers.Adam(learning_rate=1e-4),
    metrics=["sparse_categorical_accuracy"],
)
model.summary()

callbacks = [keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)]

model.fit(
    x_train,
    y_train,
    validation_split=0.2,
    epochs=50,
    batch_size=32,
    callbacks=callbacks,
)

model.evaluate(x_test, y_test, verbose=1)

Epoch 1/50
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 148ms/step - loss: 2.9432 - sparse_categorical_accuracy: 0.0536 - val_loss: 2.9442 - val_sparse_categorical_accuracy: 0.0623
Epoch 2/50
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 63ms/step - loss: 2.9439 - sparse_categorical_accuracy: 0.0530 - val_loss: 2.9443 - val_sparse_categorical_accuracy: 0.0575
Epoch 3/50
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 64ms/step - loss: 2.9437 - sparse_categorical_accuracy: 0.0571 - val_loss: 2.9443 - val_sparse_categorical_accuracy: 0.0568
Epoch 4/50
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 65ms/step - loss: 2.9429 - sparse_categorical_accuracy: 0.0576 - val_loss: 2.9443 - val_sparse_categorical_accuracy: 0.0521
Epoch 5/50
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 64ms/step - loss: 2.9438 - sparse_categorical_accuracy: 0.0598 - val_loss: 2.9444 - val_sparse_categorical_accura

[2.945152521133423, 0.05921052768826485]

In [25]:
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f"Test accuracy: {test_accuracy}")

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 26ms/step - loss: 2.9453 - sparse_categorical_accuracy: 0.0640
Test accuracy: 0.05921052768826485
