<a href="https://colab.research.google.com/github/juhumkwon/Data/blob/main/01_%EC%9B%B9%EC%85%80(%EB%A8%B8%EC%8B%A0%EB%9F%AC%EB%8B%9D_logisticregression).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# 웹셸 탐지기 예제 (머신러닝 기반)

import os
import re
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression

# 1. 학습 데이터 (정상 PHP, 웹셸 PHP 코드 샘플)
normal_php = [
    '<?php echo "Hello, world!"; ?>',
    '<?php $title = "홈페이지"; include("header.php"); ?>',
    '<?php if ($_SERVER["REQUEST_METHOD"] == "POST") { echo "처리중"; } ?>'
]

webshell_php = [
    '<?php eval($_GET["cmd"]); ?>',
    '<?php system($_POST["cmd"]); ?>',
    '<?php echo base64_decode($_REQUEST["x"]); ?>',
    '<?php passthru($_GET["exec"]); ?>',
    '<?php $code = $_POST["x"]; eval(base64_decode($code)); ?>'
]

X_data = normal_php + webshell_php
y_labels = [0] * len(normal_php) + [1] * len(webshell_php)  # 0=정상, 1=웹셸

# 2. 벡터화 (단어 → 숫자)
vectorizer = TfidfVectorizer(token_pattern=r'\b\w+\b', lowercase=True)
X_vectorized = vectorizer.fit_transform(X_data)

# 3. 모델 학습 (로지스틱 회귀)
# clf = LogisticRegression(C=1.0, penalty='l2', solver='liblinear', max_iter=1000)
clf = LogisticRegression(max_iter=1000)
clf.fit(X_vectorized, y_labels)

# 4. 예측 테스트
test_code = [
    '<?php echo "관리자 페이지입니다."; ?>',                    # 정상
    '<?php eval(base64_decode($_GET["cmd"])); ?>',              # 웹셸
    '<?php include("menu.php"); echo $content; ?>',             # 정상
    '<?php system("ls"); ?>',                                   # 웹셸
]

X_test = vectorizer.transform(test_code)
predictions = clf.predict(X_test)

# 5. 결과 출력
for code, pred in zip(test_code, predictions):
    print("[WebShell Detected]" if pred == 1 else "[Normal Code]", "\n→", code[:60], "\n")


[WebShell Detected] 
→ <?php echo "관리자 페이지입니다."; ?> 

[WebShell Detected] 
→ <?php eval(base64_decode($_GET["cmd"])); ?> 

[WebShell Detected] 
→ <?php include("menu.php"); echo $content; ?> 

[WebShell Detected] 
→ <?php system("ls"); ?> 

