In [19]:
# Core libraries for data processing and machine learning
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
import time
from sklearn.neural_network import MLPRegressor
from sklearn.exceptions import ConvergenceWarning
# ConvergenceWarning 무시
import warnings
warnings.filterwarnings("ignore", category=ConvergenceWarning)


print("Libraries imported successfully!")

Libraries imported successfully!


In [8]:
# ==========================================
# Data Loading and Preprocessing Pipeline
# ==========================================

# Load preprocessed credit card fraud dataset
df = pd.read_csv("preprocessed-creditcard.csv")
X = df.drop("Class", axis=1).values  # Feature matrix
y = df["Class"].values                # Target labels (0: normal, 1: fraud)

print(f"Dataset loaded: {X.shape[0]} samples, {X.shape[1]} features")
print(f"Fraud rate: {np.mean(y):.4f} ({np.sum(y)} fraud cases)")

# Stratified train-test split to maintain class distribution
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# Feature standardization using Z-score normalization
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

# Dimensionality reduction using PCA to match quantum register size
pca = PCA(n_components=4, random_state=42)
X_train_4d = pca.fit_transform(X_train)
X_test_4d  = pca.transform(X_test)

print(f"\nTraining set: {X_train_4d.shape}")
print(f"Test set: {X_test_4d.shape}")
print(f"PCA explained variance ratio: {pca.explained_variance_ratio_}")
print(f"Total variance explained: {np.sum(pca.explained_variance_ratio_):.4f}")

Dataset loaded: 946 samples, 30 features
Fraud rate: 0.5000 (473 fraud cases)

Training set: (756, 4)
Test set: (190, 4)
PCA explained variance ratio: [0.38421646 0.10954544 0.06067923 0.05752846]
Total variance explained: 0.6120


In [9]:
# ==========================================
# Configuration for Two-Strategy Comparison
# ==========================================

# ENHANCED qVAE FEATURES
USE_DATA_REUPLOADING = True     # Embed data at each variational layer
USE_PARALLEL_EMBEDDING = 2      # Replicate data across multiple qubits (2x = 8 data qubits)
USE_ALTERNATE_EMBEDDING = True  # Alternate between RY and RX rotations
USE_SWAP_TEST = True           # Use quantum SWAP test for accurate fidelity measurement

# QUANTUM ARCHITECTURE PARAMETERS
N_REFERENCE_QUBITS = 2  # Reference qubits for SWAP test
N_TRASH_QUBITS = 2     # Trash qubits for SWAP test

# TRAINING CONFIGURATION
TRAINING_CONFIG = {
    'epochs_angle': 12,        # Standard angle embedding
    'epochs_qvae': 15,         # Enhanced qVAE (needs more epochs)
    'epochs_classical': 15,
    'batch_size_angle': 20,    # Standard strategy
    'batch_size_qvae': 8,      # Enhanced qVAE (memory intensive)
    'batch_size_classical' : 20,
    'learning_rate': 0.05      # Adam optimizer stepsize
}

print("="*80)
print("QUANTUM AUTOENCODER - ANGLE vs ENHANCED qVAE COMPARISON")
print("="*80)
print(f"Enhanced qVAE Configuration:")
print(f"  - Data Re-uploading: {USE_DATA_REUPLOADING}")
print(f"  - Parallel Embedding: {USE_PARALLEL_EMBEDDING}x (8 data qubits)")
print(f"  - Alternate RY/RX: {USE_ALTERNATE_EMBEDDING}")
print(f"  - SWAP Test: {USE_SWAP_TEST}")
print(f"  - Total qubits: 13 (8 data + 2 ref + 2 trash + 1 control)")
print(f"\nTraining Configuration: {TRAINING_CONFIG}")
print("="*80)

QUANTUM AUTOENCODER - ANGLE vs ENHANCED qVAE COMPARISON
Enhanced qVAE Configuration:
  - Data Re-uploading: True
  - Parallel Embedding: 2x (8 data qubits)
  - Alternate RY/RX: True
  - SWAP Test: True
  - Total qubits: 13 (8 data + 2 ref + 2 trash + 1 control)

Training Configuration: {'epochs_angle': 12, 'epochs_qvae': 15, 'epochs_classical': 15, 'batch_size_angle': 20, 'batch_size_qvae': 8, 'batch_size_classical': 20, 'learning_rate': 0.05}


In [15]:
def compute_classical_batch_cost(samples, model):
    """
    Classical AE용 배치 손실 계산 함수.
    
    Args:
        samples: (배치) 원본 데이터, shape = (batch_size, n_features)
        model: 학습된 MLPRegressor 오토인코더
    
    Returns:
        linear_loss: 평균 절댓값 오차 (MAE)
        squared_loss: 평균 제곱 오차 (MSE)
    """
    recon = model.predict(samples)
    errors = recon - samples
    linear_loss   = np.mean(np.abs(errors))
    squared_loss  = np.mean(errors**2)
    return linear_loss, squared_loss

In [16]:
def train_classical_ae_strategy():
    print(f"\n{'='*60}")
    print("TRAINING: CLASSICAL AUTOENCODER STRATEGY")
    print(f"{'='*60}")

    total_epochs = 50
    ae = MLPRegressor(
        hidden_layer_sizes=(2,),
        activation='relu',
        solver='adam',
        learning_rate_init=TRAINING_CONFIG['learning_rate'],
        max_iter=1,           # 한 에포크씩 학습
        warm_start=True,
        batch_size=TRAINING_CONFIG['batch_size_classical'],
        random_state=42,
        verbose=False
    )

    linear_losses  = []
    squared_losses = []

    for epoch in range(1, total_epochs+1):
        # 한 에포크 학습
        ae.fit(X_train_4d, X_train_4d)

        # 전체 학습 세트에 대한 loss 계산
        lin_loss, sq_loss = compute_classical_batch_cost(X_train_4d, ae)
        linear_losses.append(lin_loss)
        squared_losses.append(sq_loss)

        # 5 에포크마다 출력
        if epoch == 1 or epoch % 5 == 0:
            print(f"  Epoch {epoch:2d}/{total_epochs} — "
                  f"Linear Loss: {lin_loss:.6f}, "
                  f"Squared Loss: {sq_loss:.6f}")

    # 테스트 세트 평가
    recon_test  = ae.predict(X_test_4d)
    test_errors = np.mean((X_test_4d - recon_test)**2, axis=1)
    threshold   = np.percentile(
        np.mean((X_train_4d - ae.predict(X_train_4d))**2, axis=1),
        TRAINING_CONFIG.get('threshold_percentile', 95)
    )
    y_pred = (test_errors > threshold).astype(int)

    print(f"\n  - Threshold (95th percentile of train MSE): {threshold:.6f}")
    print(f"  - Test set anomaly rate: {y_pred.mean():.4f}")

    return {
        'strategy'       : 'classical_ae',
        'model'          : ae,
        'threshold'      : threshold,
        'y_pred'         : y_pred,
        'linear_losses'  : linear_losses,
        'squared_losses' : squared_losses
    }

In [20]:
# Train Angle strategy
results = {}
total_start_time = time.time()
try:
    classical_result = train_classical_ae_strategy()
    results['classical'] = classical_result
    print(f"✓ classical strategy completed successfully")
except Exception as e:
    print(f"✗ classical strategy failed: {str(e)}")
    results['classical'] = {'error': str(e)}



TRAINING: CLASSICAL AUTOENCODER STRATEGY
  Epoch  1/50 — Linear Loss: 0.772104, Squared Loss: 1.573793
  Epoch  5/50 — Linear Loss: 0.606633, Squared Loss: 1.263942
  Epoch 10/50 — Linear Loss: 0.571217, Squared Loss: 1.144388
  Epoch 15/50 — Linear Loss: 0.589840, Squared Loss: 1.064400
  Epoch 20/50 — Linear Loss: 0.610080, Squared Loss: 1.074977
  Epoch 25/50 — Linear Loss: 0.587429, Squared Loss: 1.018109
  Epoch 30/50 — Linear Loss: 0.566136, Squared Loss: 1.042183
  Epoch 35/50 — Linear Loss: 0.563553, Squared Loss: 0.992692
  Epoch 40/50 — Linear Loss: 0.589225, Squared Loss: 0.987001
  Epoch 45/50 — Linear Loss: 0.568764, Squared Loss: 0.984173
  Epoch 50/50 — Linear Loss: 0.609792, Squared Loss: 0.997257

  - Threshold (95th percentile of train MSE): 3.625495
  - Test set anomaly rate: 0.0263
✓ classical strategy completed successfully
