In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# # 필요한 라이브러리 설치
# !pip install tensorflow==2.15.0
# !pip install tensorflow-quantum==0.7.3

# 라이브러리 임포트
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_quantum as tfq
import cirq
import sympy
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
import matplotlib.pyplot as plt

In [3]:
# Define the path to the CSV file
path = "/content/drive/MyDrive/QNN/archive/"
csv_filename = f"{path}NF-UNSW-NB15.csv"

# Load the CSV file
df = pd.read_csv(csv_filename, low_memory=False)

# Display the summary of the dataframe
print("\nSummary of the CSV DataFrame:")
df


Summary of the CSV DataFrame:


Unnamed: 0,IPV4_SRC_ADDR,L4_SRC_PORT,IPV4_DST_ADDR,L4_DST_PORT,PROTOCOL,L7_PROTO,IN_BYTES,OUT_BYTES,IN_PKTS,OUT_PKTS,TCP_FLAGS,FLOW_DURATION_MILLISECONDS,Label,Attack
0,149.171.126.0,62073,59.166.0.5,56082,6,0.0,9672,416,11,8,25,15,0,Benign
1,149.171.126.2,32284,59.166.0.5,1526,6,0.0,1776,104,6,2,25,0,0,Benign
2,149.171.126.0,21,59.166.0.1,21971,6,1.0,1842,1236,26,22,25,1111,0,Benign
3,59.166.0.1,23800,149.171.126.0,46893,6,0.0,528,8824,10,12,27,124,0,Benign
4,59.166.0.5,63062,149.171.126.2,21,6,1.0,1786,2340,32,34,25,1459,0,Benign
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1623113,59.166.0.2,1640,149.171.126.8,53,17,0.0,130,162,2,2,0,0,0,Benign
1623114,59.166.0.2,3610,149.171.126.6,21,6,1.0,2044,2404,36,34,26,0,0,Benign
1623115,59.166.0.2,4667,149.171.126.6,40725,6,0.0,320,1918,6,8,27,0,0,Benign
1623116,59.166.0.2,5641,149.171.126.6,56243,6,0.0,528,8824,10,12,27,0,0,Benign


In [4]:
# 1. 특성 선택
# 필요 없는 열 제거 (소스 및 목적지 IP 주소와 포트)
selected_features = [
    'PROTOCOL', 'L7_PROTO', 'IN_BYTES', 'OUT_BYTES',
    'IN_PKTS', 'OUT_PKTS', 'TCP_FLAGS', 'FLOW_DURATION_MILLISECONDS', 'Label'
]
df_selected = df[selected_features]

# 2. 불균형 데이터 처리
# 악성 샘플과 정상 샘플을 분리
benign = df_selected[df_selected['Label'] == 0]
malicious = df_selected[df_selected['Label'] == 1]

# 정상 샘플을 악성 샘플 수에 맞게 리샘플링
benign_downsampled = resample(benign, replace=False, n_samples=len(malicious), random_state=123)

# 악성 샘플과 리샘플링된 정상 샘플을 합침
df_balanced = pd.concat([benign_downsampled, malicious])

# 3. 데이터 분할
train, test = train_test_split(df_balanced, test_size=0.15, random_state=1)

# 4. 특성 인코딩
# 각 특성 값을 양자 정보로 인코딩
def encode_features(df, features):
    df_encoded = df.copy()
    for feature in features:
        if feature in df_encoded.columns:
            max_value = df_encoded[feature].max()
            df_encoded[feature] = df_encoded[feature] / max_value * np.pi
            df_encoded[feature] = np.round(df_encoded[feature] / 0.25) * 0.25  # 양자화
    return df_encoded

features_to_encode = ['PROTOCOL', 'L7_PROTO', 'IN_BYTES', 'OUT_BYTES',
                      'IN_PKTS', 'OUT_PKTS', 'TCP_FLAGS', 'FLOW_DURATION_MILLISECONDS']

train_encoded = encode_features(train, features_to_encode)
test_encoded = encode_features(test, features_to_encode)

In [5]:
train_encoded

Unnamed: 0,PROTOCOL,L7_PROTO,IN_BYTES,OUT_BYTES,IN_PKTS,OUT_PKTS,TCP_FLAGS,FLOW_DURATION_MILLISECONDS,Label
888809,0.00,0.50,0.0,0.00,0.0,0.00,2.75,0.0,0
549273,3.00,0.00,0.0,0.00,0.0,0.00,0.00,0.0,1
967695,0.00,0.00,0.0,0.00,0.0,0.00,2.00,0.0,1
847258,0.00,0.50,0.0,0.25,0.0,0.25,2.75,0.0,0
584324,0.25,0.25,0.0,0.00,0.0,0.00,0.00,0.0,0
...,...,...,...,...,...,...,...,...,...
17442,0.00,0.00,0.0,0.00,0.0,0.00,2.75,0.0,1
536827,0.00,0.25,0.0,0.00,0.0,0.00,2.00,0.0,1
211970,0.00,0.00,0.0,0.00,0.0,0.00,2.75,0.0,0
90433,0.25,0.00,0.0,0.00,0.0,0.00,0.00,0.0,0


In [6]:
test_encoded

Unnamed: 0,PROTOCOL,L7_PROTO,IN_BYTES,OUT_BYTES,IN_PKTS,OUT_PKTS,TCP_FLAGS,FLOW_DURATION_MILLISECONDS,Label
1587394,0.00,0.00,0.0,0.0,0.0,0.00,3.25,0.0,0
1305802,0.00,0.00,0.0,0.0,0.0,0.00,3.25,0.0,0
839286,0.25,0.25,0.0,0.0,0.0,0.00,0.00,0.0,0
689916,0.00,0.00,0.0,0.0,0.0,0.00,3.25,0.0,1
960348,0.00,0.00,0.0,0.0,0.0,0.00,3.25,0.0,0
...,...,...,...,...,...,...,...,...,...
813051,0.00,0.00,0.0,0.0,0.0,0.25,3.25,0.0,1
1429767,0.25,0.00,0.0,0.0,0.0,0.00,0.00,0.0,0
86295,0.00,0.25,0.0,0.0,0.0,0.00,2.25,0.0,1
376879,1.75,0.00,0.0,0.0,0.0,0.00,0.00,0.0,1


In [7]:
# import cirq
# import tensorflow as tf
# import tensorflow_quantum as tfq
# import sympy
# import matplotlib.pyplot as plt
# from keras.callbacks import Callback

# # 하이퍼파라미터 정의
# NUM_QUBITS = 4  # 양자 비트 수
# NUM_LAYERS = 2  # QNN 레이어 수
# LEARNING_RATE = 0.02
# BATCH_SIZE = 32
# EPOCHS = 10
# ACTIVATION_FUNCTION = 'relu'  # 활성화 함수 ('relu', 'sigmoid', 'tanh', 등)
# LOSS_FUNCTION = 'binary_crossentropy'  # 손실 함수 ('binary_crossentropy', 'categorical_crossentropy', 등)
# OPTIMIZER = 'adam'  # 옵티마이저 ('adam', 'sgd', 'rmsprop', 등)

# # 양자 회로 생성 함수
# def create_quantum_model(num_qubits, num_layers):
#     qubits = [cirq.GridQubit(i, 0) for i in range(num_qubits)]
#     circuit = cirq.Circuit()

#     # 각 특성값을 양자 회로에 인코딩
#     for i in range(num_qubits):
#         theta = sympy.Symbol(f'theta_{i}')
#         circuit.append(cirq.rx(theta).on(qubits[i]))

#     # QNN 레이어 추가
#     for _ in range(num_layers):
#         for i in range(num_qubits - 1):
#             circuit.append(cirq.XX(qubits[i], qubits[i + 1]))
#             circuit.append(cirq.YY(qubits[i], qubits[i + 1]))

#     # 측정 연산 추가
#     readout = cirq.Z(qubits[0])
#     return circuit, readout

# # QNN 모델 생성 함수
# def build_qnn_model(num_qubits, num_layers, learning_rate, activation_function, loss_function, optimizer):
#     circuit, readout_op = create_quantum_model(num_qubits, num_layers)
#     qnn_model = tf.keras.Sequential([
#         tf.keras.layers.Input(shape=(), dtype=tf.dtypes.string),
#         tfq.layers.PQC(circuit, readout_op),
#         tf.keras.layers.Dense(1, activation=activation_function)
#     ])

#     # 옵티마이저 설정
#     if optimizer == 'adam':
#         optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
#     elif optimizer == 'sgd':
#         optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
#     elif optimizer == 'rmsprop':
#         optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)
#     else:
#         raise ValueError("Unknown optimizer")

#     qnn_model.compile(optimizer=optimizer,
#                       loss=loss_function, metrics=['accuracy'])
#     return qnn_model

# # 배치마다 loss를 기록하는 콜백 클래스
# class BatchLossHistory(Callback):
#     def on_train_begin(self, logs={}):
#         self.batch_losses = []  # 각 배치의 loss를 기록할 리스트
#         self.epoch_losses = []  # 각 에포크의 평균 loss를 기록할 리스트

#     def on_batch_end(self, batch, logs={}):
#         self.batch_losses.append(logs.get('loss'))  # 배치가 끝날 때마다 loss 기록

#     def on_epoch_end(self, epoch, logs={}):
#         self.epoch_losses.append(logs.get('loss'))  # 에포크가 끝날 때마다 평균 loss 기록

# # 데이터셋 준비 (이전 단계에서 인코딩된 train_encoded, test_encoded 사용)
# x_train = train_encoded.drop(columns=['Label']).values.tolist()
# y_train = train_encoded['Label'].values
# x_test = test_encoded.drop(columns=['Label']).values.tolist()
# y_test = test_encoded['Label'].values

# # 데이터를 TensorFlow Quantum 포맷으로 변환
# def convert_to_tensor(data):
#     return tfq.convert_to_tensor([
#         cirq.Circuit(cirq.rx(x)(cirq.GridQubit(i, 0)) for i, x in enumerate(sample)) for sample in data
#     ])

# x_train_tfcirc = convert_to_tensor(x_train)
# x_test_tfcirc = convert_to_tensor(x_test)

# # 모델 구성
# model = build_qnn_model(NUM_QUBITS, NUM_LAYERS, LEARNING_RATE, ACTIVATION_FUNCTION, LOSS_FUNCTION, OPTIMIZER)

# # 커스텀 콜백 생성
# batch_loss_history = BatchLossHistory()

# # 모델 학습
# history = model.fit(x_train_tfcirc, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS,
#                     validation_data=(x_test_tfcirc, y_test), callbacks=[batch_loss_history])

# # 테스트 손실 기록
# test_loss, test_acc = model.evaluate(x_test_tfcirc, y_test)

# # 테스트 손실 출력
# print(f"Test Loss: {test_loss}")
# print(f"Test Accuracy: {test_acc}")

# # # 배치별 loss 기록 그래프 출력
# # plt.figure(figsize=(12, 5))
# # plt.subplot(1, 2, 1)
# # plt.plot(batch_loss_history.batch_losses, label='Batch Loss during Training')
# # plt.xlabel('Batch')
# # plt.ylabel('Loss')
# # plt.title('Batch Loss during Training')
# # plt.legend()

# # # 에포크별 손실 변화 그래프 출력
# # plt.subplot(1, 2, 2)
# # plt.plot(batch_loss_history.epoch_losses, label='Epoch Loss')
# # plt.xlabel('Epochs')
# # plt.ylabel('Loss')
# # plt.title('Loss at the End of Each Epoch')
# # plt.legend()

# # plt.show()

# # # 전체 에포크에 대한 정확도 그래프 출력
# # plt.plot(history.history['accuracy'], label='Train Accuracy')
# # plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
# # plt.xlabel('Epochs')
# # plt.ylabel('Accuracy')
# # plt.legend()
# # plt.show()

# # 테스트 손실 수치 출력
# print("Validation Loss over Epochs:")
# for epoch, loss in enumerate(history.history['val_loss'], 1):
#     print(f"Epoch {epoch}: {loss:.4f}")

# # 테스트 손실 그래프 출력
# plt.plot(history.history['val_loss'], label='Validation Loss')
# plt.xlabel('Epochs')
# plt.ylabel('Loss')
# plt.title('Validation Loss over Epochs')
# plt.legend()
# plt.show()

In [None]:
import cirq
import tensorflow as tf
import tensorflow_quantum as tfq
import sympy
import matplotlib.pyplot as plt
from keras.callbacks import Callback

# 하이퍼파라미터 정의
HYPERPARAMS = {
    "NUM_QUBITS": 4,  # 양자 비트 수
    "NUM_LAYERS_LIST": [2, 3, 4],  # QNN 레이어 수 리스트
    "LEARNING_RATE_LIST": [0.01, 0.02],  # 학습률 리스트
    "BATCH_SIZE": 32,
    "EPOCHS": 10,
    "ACTIVATION_FUNCTION_LIST": ['relu', 'sigmoid'],  # 활성화 함수 리스트
    "LOSS_FUNCTION_LIST": ['binary_crossentropy', 'categorical_crossentropy'],  # 손실 함수 리스트
    "OPTIMIZER_LIST": ['adam', 'sgd']  # 옵티마이저 리스트
}

# 양자 회로 생성 함수
def create_quantum_model(num_qubits, num_layers):
    qubits = [cirq.GridQubit(i, 0) for i in range(num_qubits)]
    circuit = cirq.Circuit()

    # 각 특성값을 양자 회로에 인코딩
    for i in range(num_qubits):
        theta = sympy.Symbol(f'theta_{i}')
        circuit.append(cirq.rx(theta).on(qubits[i]))

    # QNN 레이어 추가
    for _ in range(num_layers):
        for i in range(num_qubits - 1):
            circuit.append(cirq.XX(qubits[i], qubits[i + 1]))
            circuit.append(cirq.YY(qubits[i], qubits[i + 1]))

    # 측정 연산 추가
    readout = cirq.Z(qubits[0])
    return circuit, readout

# QNN 모델 생성 함수
def build_qnn_model(num_qubits, num_layers, activation_function):
    circuit, readout_op = create_quantum_model(num_qubits, num_layers)
    qnn_model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(), dtype=tf.dtypes.string),
        tfq.layers.PQC(circuit, readout_op),
        tf.keras.layers.Dense(1, activation=activation_function)
    ])
    return qnn_model

# 배치마다 loss를 기록하는 콜백 클래스
class BatchLossHistory(Callback):
    def on_train_begin(self, logs={}):
        self.batch_losses = []  # 각 배치의 loss를 기록할 리스트
        self.epoch_losses = []  # 각 에포크의 평균 loss를 기록할 리스트

    def on_batch_end(self, batch, logs={}):
        self.batch_losses.append(logs.get('loss'))  # 배치가 끝날 때마다 loss 기록

    def on_epoch_end(self, epoch, logs={}):
        self.epoch_losses.append(logs.get('loss'))  # 에포크가 끝날 때마다 평균 loss 기록

# 데이터셋 준비 (이전 단계에서 인코딩된 train_encoded, test_encoded 사용)
x_train = train_encoded.drop(columns=['Label']).values.tolist()
y_train = train_encoded['Label'].values
x_test = test_encoded.drop(columns=['Label']).values.tolist()
y_test = test_encoded['Label'].values

# 데이터를 TensorFlow Quantum 포맷으로 변환
def convert_to_tensor(data):
    return tfq.convert_to_tensor([
        cirq.Circuit(cirq.rx(x)(cirq.GridQubit(i, 0)) for i, x in enumerate(sample)) for sample in data
    ])

x_train_tfcirc = convert_to_tensor(x_train)
x_test_tfcirc = convert_to_tensor(x_test)

# 테스트 손실을 저장할 딕셔너리
test_losses = {}

# 각 하이퍼파라미터 조합에 대해 모델 학습 및 평가
for num_layers in HYPERPARAMS['NUM_LAYERS_LIST']:
    for learning_rate in HYPERPARAMS['LEARNING_RATE_LIST']:
        for activation_function in HYPERPARAMS['ACTIVATION_FUNCTION_LIST']:
            for loss_function in HYPERPARAMS['LOSS_FUNCTION_LIST']:
                for optimizer_name in HYPERPARAMS['OPTIMIZER_LIST']:
                    print(f"Training with {num_layers} layers, {learning_rate} learning rate, {activation_function} activation, {loss_function} loss, {optimizer_name} optimizer")

                    # 모델 구성
                    model = build_qnn_model(
                        num_qubits=HYPERPARAMS['NUM_QUBITS'],
                        num_layers=num_layers,
                        activation_function=activation_function
                    )

                    # 옵티마이저 설정
                    if optimizer_name == 'adam':
                        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
                    elif optimizer_name == 'sgd':
                        optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)

                    # 모델 컴파일
                    model.compile(optimizer=optimizer, loss=loss_function, metrics=['accuracy'])

                    # 커스텀 콜백 생성
                    batch_loss_history = BatchLossHistory()

                    # 모델 학습
                    model.fit(x_train_tfcirc, y_train, batch_size=HYPERPARAMS['BATCH_SIZE'], epochs=HYPERPARAMS['EPOCHS'],
                              validation_data=(x_test_tfcirc, y_test), callbacks=[batch_loss_history])

                    # 테스트 셋 평가
                    test_loss, _ = model.evaluate(x_test_tfcirc, y_test)

                    # 결과 저장 (하이퍼파라미터 조합을 키로)
                    hyperparam_combo = (num_layers, learning_rate, activation_function, loss_function, optimizer_name)
                    test_losses[hyperparam_combo] = test_loss

# 테스트 손실 그래프 출력
plt.figure(figsize=(10, 6))
for hyperparam_combo, loss in test_losses.items():
    label = f"Layers: {hyperparam_combo[0]}, LR: {hyperparam_combo[1]}, Act: {hyperparam_combo[2]}, Loss: {hyperparam_combo[3]}, Opt: {hyperparam_combo[4]}"
    plt.plot([hyperparam_combo[0]], [loss], marker='o', label=label)

plt.xlabel('Number of Layers')
plt.ylabel('Test Loss')
plt.title('Test Loss for Different Hyperparameter Combinations')
plt.legend(loc='best', bbox_to_anchor=(1, 1))
plt.show()

Training with 2 layers, 0.01 learning rate, relu activation, binary_crossentropy loss, adam optimizer
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training with 2 layers, 0.01 learning rate, relu activation, binary_crossentropy loss, sgd optimizer
Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training with 2 layers, 0.01 learning rate, relu activation, categorical_crossentropy loss, adam optimizer
Epoch 1/10


  return dispatch_target(*args, **kwargs)


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training with 2 layers, 0.01 learning rate, relu activation, categorical_crossentropy loss, sgd optimizer
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training with 2 layers, 0.01 learning rate, sigmoid activation, binary_crossentropy loss, adam optimizer
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training with 2 layers, 0.01 learning rate, sigmoid activation, binary_crossentropy loss, sgd optimizer
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training with 2 layers, 0.01 learning rate, sigmoid activation, categorical_crossentropy loss, adam optimizer
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training with 2 layers, 0.01 le