In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score , recall_score , confusion_matrix
tf.random.set_seed(2)

In [4]:
#원본 로딩
sequence_df = pd.read_excel('../../sample_data/symbol_sequence.xlsx')
sequence_df

Unnamed: 0,T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,...,T51,T52,T53,T54,T55,T56,T57,T58,T59,T60
0,A,B,C,A,B,A,B,C,A,B,...,B,C,A,B,C,A,B,C,A,B


In [5]:
#형태 변환 검사
sequence_matrix_df = pd.DataFrame(sequence_df.to_numpy().reshape(12,5))
sequence_matrix_df

Unnamed: 0,0,1,2,3,4
0,A,B,C,A,B
1,A,B,C,A,B
2,A,B,C,A,B
3,A,B,C,A,B
4,A,B,C,A,B
5,A,B,C,A,B
6,C,A,B,C,A
7,B,C,A,B,C
8,A,B,C,A,B
9,C,A,B,C,A


In [6]:
#numpy로 전환
sequence_np = sequence_df.to_numpy().reshape(-1)
sequence_np

array(['A', 'B', 'C', 'A', 'B', 'A', 'B', 'C', 'A', 'B', 'A', 'B', 'C',
       'A', 'B', 'A', 'B', 'C', 'A', 'B', 'A', 'B', 'C', 'A', 'B', 'A',
       'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C', 'A', 'B',
       'C', 'A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C',
       'A', 'B', 'C', 'A', 'B', 'C', 'A', 'B'], dtype=object)

In [7]:
# 라벨 인코더 생성
encoder = LabelEncoder()

In [8]:
encoder.fit(sequence_np)

In [9]:
sequence_encoded_np = encoder.transform(sequence_np)
sequence_encoded_np

array([0, 1, 2, 0, 1, 0, 1, 2, 0, 1, 0, 1, 2, 0, 1, 0, 1, 2, 0, 1, 0, 1,
       2, 0, 1, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0,
       1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1])

In [10]:
#2차원 행렬로 변환
sequence_tbl = sequence_encoded_np.reshape(12,5)   
sequence_tbl

array([[0, 1, 2, 0, 1],
       [0, 1, 2, 0, 1],
       [0, 1, 2, 0, 1],
       [0, 1, 2, 0, 1],
       [0, 1, 2, 0, 1],
       [0, 1, 2, 0, 1],
       [2, 0, 1, 2, 0],
       [1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1],
       [2, 0, 1, 2, 0],
       [1, 2, 0, 1, 2],
       [0, 1, 2, 0, 1]])

In [11]:
X = sequence_tbl[:,0:4]
display(X)

array([[0, 1, 2, 0],
       [0, 1, 2, 0],
       [0, 1, 2, 0],
       [0, 1, 2, 0],
       [0, 1, 2, 0],
       [0, 1, 2, 0],
       [2, 0, 1, 2],
       [1, 2, 0, 1],
       [0, 1, 2, 0],
       [2, 0, 1, 2],
       [1, 2, 0, 1],
       [0, 1, 2, 0]])

In [12]:
y = sequence_tbl[:,4:5]
display(y)

array([[1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [2],
       [1],
       [0],
       [2],
       [1]])

In [13]:
x_train, x_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, shuffle=True, stratify=y, random_state=34)

In [14]:
print(x_train)
print(x_valid)

[[0 1 2 0]
 [1 2 0 1]
 [0 1 2 0]
 [0 1 2 0]
 [0 1 2 0]
 [2 0 1 2]
 [2 0 1 2]
 [0 1 2 0]
 [0 1 2 0]]
[[1 2 0 1]
 [0 1 2 0]
 [0 1 2 0]]


In [15]:
print(y_train)
print(y_valid)

[[1]
 [2]
 [1]
 [1]
 [1]
 [0]
 [0]
 [1]
 [1]]
[[2]
 [1]
 [1]]


In [16]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.LSTM(units=4, activation='relu', input_shape=(4,1))) # (seq_len, input length)
model.add(tf.keras.layers.Dense(4))
model.add(tf.keras.layers.Dense(1))
model.summary()
model.compile(optimizer='adam', loss='mse')
model.fit(x_train, y_train, epochs=500, batch_size=3)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 4)                 96        
                                                                 
 dense (Dense)               (None, 4)                 20        
                                                                 
 dense_1 (Dense)             (None, 1)                 5         
                                                                 
Total params: 121
Trainable params: 121
Non-trainable params: 0
_________________________________________________________________
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 

<keras.callbacks.History at 0x206dd7fa910>

In [17]:
x_input =x_valid.reshape((3,4,1))
print(x_input)

[[[1]
  [2]
  [0]
  [1]]

 [[0]
  [1]
  [2]
  [0]]

 [[0]
  [1]
  [2]
  [0]]]


In [18]:
y_predict = model.predict(x_input)



In [19]:
y_predict = y_predict.reshape(-1)
y_predict = np.round(y_predict).astype(int)
print(y_predict)

[2 1 1]


In [20]:
y_valid = y_valid.reshape(-1).tolist()
print(y_valid)

[2, 1, 1]


In [21]:
cm = confusion_matrix(y_valid, y_predict)
cm

array([[2, 0],
       [0, 1]], dtype=int64)

In [22]:
precision = precision_score(y_valid, y_predict,average= "macro")
recall = recall_score(y_valid, y_predict,average= "macro")
print('정밀도: {0:.4f}, 재현율: {1:.4f}'.format(precision, recall))

정밀도: 1.0000, 재현율: 1.0000
