<a href="https://colab.research.google.com/github/juhumkwon/DeepLearning/blob/main/%EA%B0%95%ED%99%94%ED%95%99%EC%8A%B5(%EB%B3%B4%EC%95%88%EC%A0%95%EC%B1%85%ED%95%99%EC%8A%B5).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import tensorflow as tf
import random

# 트래픽 상태: [포트, IP 평판, 패킷 길이, 프로토콜]
def generate_traffic():
    port = np.random.choice([22, 80, 443, 3389])
    reputation = np.random.choice([0, 1])  # 0: 악성, 1: 정상
    length = np.random.randint(50, 1500)
    proto = np.random.choice([0, 1])  # 0: TCP, 1: UDP
    return np.array([port/1000, reputation, length/1500, proto], dtype=np.float32)

# 에이전트 행동: 0 = 허용, 1 = 차단
def get_reward(action, traffic):
    label = traffic[1]  # 평판 기반 진짜 정답
    if action == 1 and label == 0:
        return +1  # 악성 차단 성공
    elif action == 1 and label == 1:
        return -2  # 정상인데 차단 (오탐)
    elif action == 0 and label == 0:
        return -5  # 악성인데 허용 (미탐)
    else:
        return 0.5  # 정상 허용

# 간단한 Q-네트워크
model = tf.keras.Sequential([
    tf.keras.layers.Dense(32, activation='relu', input_shape=(4,)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(2, activation='linear')  # 행동 2가지
])
optimizer = tf.keras.optimizers.Adam(0.001)
gamma = 0.95

# 학습
for episode in range(1000):
    state = generate_traffic()
    state_input = tf.expand_dims(state, axis=0)
    q_values = model(state_input).numpy()[0]

    # ε-greedy
    epsilon = max(0.1, 1 - episode / 500)
    if random.random() < epsilon:
        action = random.randint(0, 1)
    else:
        action = np.argmax(q_values)

    reward = get_reward(action, state)
    next_state = generate_traffic()
    next_q = model(tf.expand_dims(next_state, axis=0)).numpy()[0]
    target = reward + gamma * np.max(next_q)

    with tf.GradientTape() as tape:
        q_pred = model(state_input)[0, action]
        loss = tf.square(q_pred - target)
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))

    if episode % 100 == 0:
        print(f"[{episode}] Loss: {loss.numpy():.4f}, Reward: {reward}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[0] Loss: 4.7207, Reward: -2
[100] Loss: 21.9890, Reward: -5
[200] Loss: 0.2188, Reward: 0.5
[300] Loss: 2.7843, Reward: 1
[400] Loss: 0.0745, Reward: 1
[500] Loss: 0.6705, Reward: 1
[600] Loss: 0.1302, Reward: 0.5
[700] Loss: 1.3433, Reward: 0.5
[800] Loss: 3.0939, Reward: 0.5
[900] Loss: 0.6720, Reward: 0.5
