In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# # TensorFlow 및 관련 라이브러리 설치
# !pip install --upgrade pip > /dev/null 2>&1
# !pip install tensorflow==2.10 > /dev/null 2>&1  # TensorFlow 버전을 2.10으로 설정
# !pip install tensorflow-quantum==0.7.3 > /dev/null 2>&1  # TensorFlow Quantum 버전 0.7.3 설치
# !pip install cirq-core==1.3.0 cirq-google==1.3.0 > /dev/null 2>&1  # Cirq 버전 1.3.0 설치
# !pip install sympy > /dev/null 2>&1  # sympy 설치
# !pip install protobuf==3.20.3

# TensorFlow 및 관련 라이브러리 설치
!pip install --upgrade pip > /dev/null 2>&1
!pip install tensorflow==2.10 > /dev/null 2>&1  # TensorFlow 2.10 설치
!pip install tensorflow-quantum==0.7.3 > /dev/null 2>&1  # TensorFlow Quantum 0.7.3 설치
!pip install cirq-core==0.13.1 cirq-google==0.13.1 > /dev/null 2>&1  # Cirq 0.13.1 설치
!pip install sympy > /dev/null 2>&1  # SymPy 설치
!pip install protobuf==3.20.3 > /dev/null 2>&1  # Protobuf 3.20.3 설치

In [3]:
# 소스에서 TensorFlow Quantum 설치
!pip uninstall tensorflow-quantum -y
!git clone https://github.com/tensorflow/quantum.git
%cd quantum
!pip install .

Found existing installation: tensorflow-quantum 0.7.3
Uninstalling tensorflow-quantum-0.7.3:
  Successfully uninstalled tensorflow-quantum-0.7.3
Cloning into 'quantum'...
remote: Enumerating objects: 6990, done.[K
remote: Counting objects: 100% (2323/2323), done.[K
remote: Compressing objects: 100% (735/735), done.[K
remote: Total 6990 (delta 1988), reused 1709 (delta 1583), pack-reused 4667 (from 1)[K
Receiving objects: 100% (6990/6990), 25.00 MiB | 16.44 MiB/s, done.
Resolving deltas: 100% (5235/5235), done.
/content/quantum
[31mERROR: Directory '.' is not installable. Neither 'setup.py' nor 'pyproject.toml' found.[0m[31m
[0m

In [4]:
import pandas as pd

# Define the path to the CSV file
path = "/content/drive/MyDrive/QNN/archive/"
csv_filename = f"{path}NF-UNSW-NB15.csv"

# Load the CSV file
df = pd.read_csv(csv_filename, low_memory=False)

# Display the summary of the dataframe
print("\nSummary of the CSV DataFrame:")
df


Summary of the CSV DataFrame:


Unnamed: 0,IPV4_SRC_ADDR,L4_SRC_PORT,IPV4_DST_ADDR,L4_DST_PORT,PROTOCOL,L7_PROTO,IN_BYTES,OUT_BYTES,IN_PKTS,OUT_PKTS,TCP_FLAGS,FLOW_DURATION_MILLISECONDS,Label,Attack
0,149.171.126.0,62073,59.166.0.5,56082,6,0.0,9672,416,11,8,25,15,0,Benign
1,149.171.126.2,32284,59.166.0.5,1526,6,0.0,1776,104,6,2,25,0,0,Benign
2,149.171.126.0,21,59.166.0.1,21971,6,1.0,1842,1236,26,22,25,1111,0,Benign
3,59.166.0.1,23800,149.171.126.0,46893,6,0.0,528,8824,10,12,27,124,0,Benign
4,59.166.0.5,63062,149.171.126.2,21,6,1.0,1786,2340,32,34,25,1459,0,Benign
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1623113,59.166.0.2,1640,149.171.126.8,53,17,0.0,130,162,2,2,0,0,0,Benign
1623114,59.166.0.2,3610,149.171.126.6,21,6,1.0,2044,2404,36,34,26,0,0,Benign
1623115,59.166.0.2,4667,149.171.126.6,40725,6,0.0,320,1918,6,8,27,0,0,Benign
1623116,59.166.0.2,5641,149.171.126.6,56243,6,0.0,528,8824,10,12,27,0,0,Benign


In [5]:
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
import numpy as np

# 1. 특성 선택
# 필요 없는 열 제거 (소스 및 목적지 IP 주소와 포트)
selected_features = [
    'PROTOCOL', 'L7_PROTO', 'IN_BYTES', 'OUT_BYTES',
    'IN_PKTS', 'OUT_PKTS', 'TCP_FLAGS', 'FLOW_DURATION_MILLISECONDS', 'Label'
]
df_selected = df[selected_features]

# 2. 불균형 데이터 처리
# 악성 샘플과 정상 샘플을 분리
benign = df_selected[df_selected['Label'] == 0]
malicious = df_selected[df_selected['Label'] == 1]

# 정상 샘플을 악성 샘플 수에 맞게 리샘플링
benign_downsampled = resample(benign, replace=False, n_samples=len(malicious), random_state=123)

# 악성 샘플과 리샘플링된 정상 샘플을 합침
df_balanced = pd.concat([benign_downsampled, malicious])

# 3. 데이터 분할
train, test = train_test_split(df_balanced, test_size=0.15, random_state=1)

# 4. 특성 인코딩
# 각 특성 값을 양자 정보로 인코딩
def encode_features(df, features):
    df_encoded = df.copy()
    for feature in features:
        if feature in df_encoded.columns:
            max_value = df_encoded[feature].max()
            df_encoded[feature] = df_encoded[feature] / max_value * np.pi
            df_encoded[feature] = np.round(df_encoded[feature] / 0.25) * 0.25  # 양자화
    return df_encoded

features_to_encode = ['PROTOCOL', 'L7_PROTO', 'IN_BYTES', 'OUT_BYTES',
                      'IN_PKTS', 'OUT_PKTS', 'TCP_FLAGS', 'FLOW_DURATION_MILLISECONDS']

train_encoded = encode_features(train, features_to_encode)
test_encoded = encode_features(test, features_to_encode)

In [6]:
train_encoded

Unnamed: 0,PROTOCOL,L7_PROTO,IN_BYTES,OUT_BYTES,IN_PKTS,OUT_PKTS,TCP_FLAGS,FLOW_DURATION_MILLISECONDS,Label
888809,0.00,0.50,0.0,0.00,0.0,0.00,2.75,0.0,0
549273,3.00,0.00,0.0,0.00,0.0,0.00,0.00,0.0,1
967695,0.00,0.00,0.0,0.00,0.0,0.00,2.00,0.0,1
847258,0.00,0.50,0.0,0.25,0.0,0.25,2.75,0.0,0
584324,0.25,0.25,0.0,0.00,0.0,0.00,0.00,0.0,0
...,...,...,...,...,...,...,...,...,...
17442,0.00,0.00,0.0,0.00,0.0,0.00,2.75,0.0,1
536827,0.00,0.25,0.0,0.00,0.0,0.00,2.00,0.0,1
211970,0.00,0.00,0.0,0.00,0.0,0.00,2.75,0.0,0
90433,0.25,0.00,0.0,0.00,0.0,0.00,0.00,0.0,0


In [7]:
test_encoded

Unnamed: 0,PROTOCOL,L7_PROTO,IN_BYTES,OUT_BYTES,IN_PKTS,OUT_PKTS,TCP_FLAGS,FLOW_DURATION_MILLISECONDS,Label
1587394,0.00,0.00,0.0,0.0,0.0,0.00,3.25,0.0,0
1305802,0.00,0.00,0.0,0.0,0.0,0.00,3.25,0.0,0
839286,0.25,0.25,0.0,0.0,0.0,0.00,0.00,0.0,0
689916,0.00,0.00,0.0,0.0,0.0,0.00,3.25,0.0,1
960348,0.00,0.00,0.0,0.0,0.0,0.00,3.25,0.0,0
...,...,...,...,...,...,...,...,...,...
813051,0.00,0.00,0.0,0.0,0.0,0.25,3.25,0.0,1
1429767,0.25,0.00,0.0,0.0,0.0,0.00,0.00,0.0,0
86295,0.00,0.25,0.0,0.0,0.0,0.00,2.25,0.0,1
376879,1.75,0.00,0.0,0.0,0.0,0.00,0.00,0.0,1


In [8]:
import tensorflow as tf
import tensorflow_quantum as tfq
import cirq

# 5. 데이터셋을 양자 회로와 TensorFlow Quantum 포맷으로 변환
def convert_to_tensor(data):
    return tfq.convert_to_tensor([
        cirq.Circuit(cirq.rx(x)(cirq.GridQubit(i, 0)) for i, x in enumerate(sample)) for sample in data
    ])

x_train = train_encoded.drop(columns=['Label']).values.tolist()
y_train = train_encoded['Label'].values
x_test = test_encoded.drop(columns=['Label']).values.tolist()
y_test = test_encoded['Label'].values

x_train_tfcirc = convert_to_tensor(x_train)
x_test_tfcirc = convert_to_tensor(x_test)

ImportError: cannot import name 'pauli_sum_pb2' from 'tensorflow_quantum.core.proto' (/content/quantum/tensorflow_quantum/core/proto/__init__.py)