In [None]:
import random
import json
import numpy as np
from google.colab import files

num_samples = 5000  # 생성할 데이터 개수
data = []

#올바른 경우
for _ in range(num_samples):
    strokes = []
    x_start = random.uniform(40, 80)
    y_start = random.uniform(80, 100)

    path1=[]
    for i in range(5):
      if i == 0 :
        path1.append([x_start, y_start, round(0.1 * (i+1), 1)])
      else :
        new_x = path1[-1][0] + random.uniform(-1,1)
        new_y = path1[-1][1] - random.uniform(10,19)
        path1.append([new_x, new_y, round(0.1 * (i+1),1)])
    strokes.append({"stroke_id":1, "path":path1})

    data.append({
        "character" : "ㅣ",
        "strokes" : strokes,
        "label" : "correct"
    })

#올바르지 않은 경우
for _ in range(num_samples):
    strokes = []
    x_start = random.uniform(40, 80)
    y_start = random.uniform(0, 20)

    path1=[]
    for i in range(5):
      if i == 0 :
        path1.append([x_start, y_start, round(0.1 * (i+1), 1)])
      else :
        new_x = path1[-1][0] + random.uniform(-1,1)
        new_y = path1[-1][1] + random.uniform(10,19)
        path1.append([new_x, new_y, round(0.1 * (i+1),1)])
    strokes.append({"stroke_id":1, "path":path1})

    data.append({
        "character" : "ㅣ",
        "strokes" : strokes,
        "label" : "incorrect"
    })

# JSON 파일 저장
data_filename = "generated_ㅣ_dataset.json"
with open(data_filename, "w", encoding="utf-8") as f:
    json.dump(data, f, ensure_ascii=False, indent=4)

files.download(data_filename)
print(f"파일 {data_filename} 생성 완료!")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

파일 generated_ㅣ_dataset.json 생성 완료!


In [None]:
import json
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Masking, Bidirectional
from sklearn.model_selection import train_test_split

# 데이터셋 로드
with open("/content/generated_ㅣ_dataset.json", "r", encoding="utf-8") as f:
    data = json.load(f)

# 📌 2️⃣ 데이터셋 변환
X, y = [], []

for sample in data:
    all_strokes = []

    for stroke in sample["strokes"]:
        all_strokes.extend(stroke["path"])  # 모든 stroke를 하나로 합침

    X.append(np.array(all_strokes))  # (N, 3) 형태
    y.append(1 if sample["label"] == "correct" else 0)  # correct: 1, incorrect: 0

# 📌 3️⃣ Padding: LSTM 입력 크기 통일
max_length = max(len(p) for p in X)  # 가장 긴 sequence 찾기
X_padded = np.zeros((len(X), max_length, 3))  # (데이터 개수, 최대 길이, 3)
for i, path in enumerate(X):
    X_padded[i, :len(path), :] = path  # 패딩 적용

# 📌 4️⃣ Independent Min-Max Normalization
def min_max_normalize(data):
    for i in range(2):  # x, y 좌표 정규화
        min_val = np.min(data[:, :, i])
        max_val = np.max(data[:, :, i])
        data[:, :, i] = (data[:, :, i] - min_val) / (max_val - min_val + 1e-8)  # 작은 값 추가하여 0-분모 방지
    return data

X_padded = min_max_normalize(X_padded)

# 📌 5️⃣ Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_padded, np.array(y), test_size=0.2, random_state=42)

# 📌 6️⃣ LSTM 모델 정의
model = Sequential([
    Masking(mask_value=0.0, input_shape=(max_length, 3)),  # 패딩된 부분 무시
    Bidirectional(LSTM(128, return_sequences=True)),  # 양방향 LSTM
    LSTM(64, return_sequences=True),
    LSTM(32),
    Dense(16, activation="relu"),
    Dense(1, activation="sigmoid")  # Binary classification (correct/incorrect)
])

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# 📌 7️⃣ 모델 학습
model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test))

# 📌 8️⃣ 모델 저장
model.save("/content/drive/MyDrive/lstm_model.h5")
np.save("/content/drive/MyDrive/max_length.npy", max_length)  # max_length도 저장

print("✅ 모델 학습 완료 및 저장됨!")

  super().__init__(**kwargs)


Epoch 1/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 41ms/step - accuracy: 0.9100 - loss: 0.2372 - val_accuracy: 1.0000 - val_loss: 4.4378e-04
Epoch 2/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 29ms/step - accuracy: 1.0000 - loss: 3.2814e-04 - val_accuracy: 1.0000 - val_loss: 1.5424e-04
Epoch 3/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 28ms/step - accuracy: 1.0000 - loss: 1.2935e-04 - val_accuracy: 1.0000 - val_loss: 7.9147e-05
Epoch 4/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 34ms/step - accuracy: 1.0000 - loss: 6.9832e-05 - val_accuracy: 1.0000 - val_loss: 4.8326e-05
Epoch 5/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 32ms/step - accuracy: 1.0000 - loss: 4.3191e-05 - val_accuracy: 1.0000 - val_loss: 3.2516e-05




✅ 모델 학습 완료 및 저장됨!


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model

# 📌 1️⃣ 모델 및 max_length 불러오기
model_path = "/content/drive/MyDrive/lstm_model.h5"
max_length_path = "/content/drive/MyDrive/max_length.npy"

model = load_model(model_path)
max_length = int(np.load(max_length_path))

print("✅ 저장된 모델 불러오기 완료!")

# 📌 2️⃣ 예측 함수
def predict_sequence(input_strokes):
    input_path = []

    for stroke in input_strokes:
        input_path.extend(stroke["path"])  # 모든 stroke 합치기

    input_path = np.array(input_path)

    # Padding
    input_padded = np.zeros((1, max_length, 3))
    input_padded[0, :len(input_path), :] = input_path

    # Independent Min-Max Normalization (for single input)
    for i in range(2):
        min_val = np.min(input_padded[:, :, i])
        max_val = np.max(input_padded[:, :, i])
        input_padded[:, :, i] = (input_padded[:, :, i] - min_val) / (max_val - min_val + 1e-8)

    # Prediction
    pred = model.predict(input_padded)
    return "correct" if pred[0][0] > 0.5 else "incorrect", pred

# 📌 🔥 예제 테스트
test_sample = {
 "strokes": [
            {
                "stroke_id": 1,
                "path": [
                    [
                        79.789472866258,
                        3.6633368904884267,
                        0.1
                    ],
                    [
                        78.8507700307559,
                        20.488036257179438,
                        0.2
                    ],
                    [
                        78.61341748679581,
                        37.72585989284296,
                        0.3
                    ],
                    [
                        78.62261716790583,
                        56.23775787137843,
                        0.4
                    ],
                    [
                        79.1530780787082,
                        71.5094578993419,
                        0.5
                    ]
                ]
            }
 ]
}
result_label, pred_value = predict_sequence(test_sample["strokes"])
print("Predicted result:", result_label)
print("Prediction probability:", pred_value)




✅ 저장된 모델 불러오기 완료!
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 796ms/step
Predicted result: incorrect
Prediction probability: [[1.6459422e-05]]
