<a href="https://colab.research.google.com/github/juhumkwon/Data/blob/main/%EC%9B%B9%EC%85%80(DeepLearning_GlobalAveragePooling1D).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ✅ 웹셸 탐지용 딥러닝 예제 (Colab에서 실행 가능)
# -----------------------------------------
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, GlobalAveragePooling1D
import numpy as np

# 1. 샘플 데이터 정의
normal_php = [
    '<?php echo "Hello, world!"; ?>',
    '<?php include("menu.php"); echo $content; ?>',
    '<?php if ($_POST["id"] == "admin") { echo "hi"; } ?>'
]

webshell_php = [
    '<?php eval($_GET["cmd"]); ?>',
    '<?php system("ls"); ?>',
    '<?php echo base64_decode($_REQUEST["x"]); ?>',
    '<?php passthru($_GET["exec"]); ?>',
    '<?php eval(base64_decode($_POST["x"])); ?>'
]

texts = normal_php + webshell_php
labels = [0] * len(normal_php) + [1] * len(webshell_php)  # 0=정상, 1=웹셸

# 2. 토큰화 + 시퀀스 변환
tokenizer = Tokenizer(num_words=1000, oov_token='<OOV>')
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

# 3. 시퀀스 패딩
max_len = max(len(seq) for seq in sequences)
padded = pad_sequences(sequences, maxlen=max_len, padding='post')

# 4. 모델 정의 (간단한 LSTM)
model = Sequential([
    Embedding(input_dim=1000, output_dim=32, input_length=max_len),
    GlobalAveragePooling1D(),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')  # 이진 분류
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# 5. 모델 학습
X_train = np.array(padded)
y_train = np.array(labels)
model.fit(X_train, y_train, epochs=20, verbose=1)

# 6. 테스트
test_codes = [
    '<?php echo "관리자 페이지"; ?>',
    '<?php eval(base64_decode($_GET["cmd"])); ?>',
    '<?php echo shell_exec($_POST["cmd"]); ?>'
]

test_seq = tokenizer.texts_to_sequences(test_codes)
test_pad = pad_sequences(test_seq, maxlen=max_len, padding='post')
pred = model.predict(test_pad)

# 7. 결과 출력
for i, code in enumerate(test_codes):
    print(f"[{'웹셸' if pred[i] > 0.5 else '정상'}] {code.strip()}")




Epoch 1/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.2500 - loss: 0.6967
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 187ms/step - accuracy: 0.3750 - loss: 0.6942
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step - accuracy: 0.5000 - loss: 0.6921
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - accuracy: 0.6250 - loss: 0.6903
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - accuracy: 0.7500 - loss: 0.6887
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step - accuracy: 0.7500 - loss: 0.6871
Epoch 7/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step - accuracy: 0.7500 - loss: 0.6854
Epoch 8/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.7500 - loss: 0.6838
Epoch 9/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1