In [1]:
import sys, subprocess, time, warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

from qiskit.circuit.library import ZZFeatureMap, TwoLocal
from qiskit_aer.primitives import Estimator
from qiskit_machine_learning.neural_networks import EstimatorQNN
from qiskit_machine_learning.algorithms.classifiers import NeuralNetworkClassifier
from qiskit_machine_learning.optimizers import COBYLA

In [2]:
# ===== Parametry do łatwej zmiany =====
data_path   = "countsAll_fixed_07_07_23.csv"  # ścieżka do pliku z danymi
sep         = "\t"                            # separator (w Twoim pliku jest tab)
n_components_pca = 2                          # liczba komponentów PCA = liczba kubitów
test_size   = 0.20                            # ułamek danych do testu
random_state = 42                             # ziarno losowe
maxiter     = 20                             # iteracje optymalizatora
entanglement = "linear"                       # "linear" | "full" | lista par
reps_feature = 2                              # głębokość feature map
reps_ansatz  = 2  

In [3]:
def build_vqc_qnn(num_features: int):
    """
    Buduje EstimatorQNN z ZZFeatureMap i TwoLocal (VQC).
    """
    feature_map = ZZFeatureMap(feature_dimension=num_features, reps=reps_feature, entanglement=entanglement)
    ansatz = TwoLocal(
        num_qubits=num_features,
        reps=reps_ansatz,
        rotation_blocks=["ry", "rz"],
        entanglement_blocks="cz",
        entanglement=entanglement,
    )
    estimator = Estimator()  # Aer backend

    qnn = EstimatorQNN(
        circuit=feature_map.compose(ansatz),
        input_params=feature_map.parameters,
        weight_params=ansatz.parameters,
        estimator=estimator,
    )

    optimizer = COBYLA(maxiter=maxiter)   # tutaj ustawiamy maxiter
    clf = NeuralNetworkClassifier(
        neural_network=qnn,
        optimizer=optimizer,
        one_hot=False,    # bo etykiety to 0/1
    )
    return clf

In [4]:
# ===== 1) Wczytanie i przygotowanie danych =====
print("Wczytywanie danych...")
df = pd.read_csv(data_path, sep=sep).T

Wczytywanie danych...


In [5]:
metadata = pd.read_csv("SampleInfo_fixed_08_07_23.csv", delimiter=";")
metadata = metadata.set_index("id")
metadata["label"] = metadata["GroupAlternative"].apply(
    lambda x: 0 if x == "Asymptomatic controls" else 1
)
metadata = metadata[metadata["RealLocation"] != "Institute 5"]
df = df.merge(metadata, left_index=True, right_index=True)

In [6]:
X = df.drop(columns=metadata.columns)
y = df["label"]

In [7]:
print("Shape X:", X.shape)
print("Shape y:", y.shape)
print("Class balance:\n", y.value_counts())
print(f"Liczba cech (genów): {X.shape[1]}, liczba próbek: {X.shape[0]}")
print(f"Klasy: 0 (zdrowe) = {(y==0).sum()}, 1 (nowotworowe) = {(y==1).sum()}")

Shape X: (2060, 5346)
Shape y: (2060,)
Class balance:
 label
1    1706
0     354
Name: count, dtype: int64
Liczba cech (genów): 5346, liczba próbek: 2060
Klasy: 0 (zdrowe) = 354, 1 (nowotworowe) = 1706


In [8]:
# standaryzacja i PCA (redukcja do n_components_pca = liczby kubitów)
scaler = StandardScaler(with_mean=True, with_std=True)
X_scaled = scaler.fit_transform(X.values)

pca = PCA(n_components=n_components_pca, random_state=random_state)
X_pca = pca.fit_transform(X_scaled)

X_train, X_test, y_train, y_test = train_test_split(
    X_pca, y, test_size=test_size, stratify=y, random_state=random_state
)

In [9]:
# ===== 2) Szybki klasyczny baseline (LogReg + PCA) =====
t0 = time.time()
logreg = LogisticRegression(max_iter=2000, class_weight="balanced")
logreg.fit(X_train, y_train)
y_pred_lr = logreg.predict(X_test)
acc_lr = accuracy_score(y_test, y_pred_lr)
t_lr = time.time() - t0
print(f"[Baseline] LogisticRegression+PCA={n_components_pca}: accuracy={acc_lr:.4f}, czas={t_lr:.2f}s")

[Baseline] LogisticRegression+PCA=2: accuracy=0.4830, czas=0.06s


In [10]:
# ===== 3) VQC (EstimatorQNN) =====
print("Trenowanie VQC (EstimatorQNN)...")
t0 = time.time()
vqc = build_vqc_qnn(num_features=n_components_pca)
vqc.fit(X_train, y_train)
y_pred_vqc = vqc.predict(X_test)
acc_vqc = accuracy_score(y_test, y_pred_vqc)
t_vqc = time.time() - t0

Trenowanie VQC (EstimatorQNN)...


In [11]:
print("\n=== WYNIKI ===")
print(f"VQC (ZZFeatureMap + TwoLocal) | PCA={n_components_pca} | reps_fm={reps_feature} | reps_ansatz={reps_ansatz}")
print(f"Accuracy (test): {acc_vqc:.4f}")
print(f"Czas wykonania (s): {t_vqc:.2f}")

print("\n[Porównanie] Baseline vs VQC")
print(f"- Baseline  : acc={acc_lr:.4f}, czas={t_lr:.2f}s")
print(f"- VQC       : acc={acc_vqc:.4f}, czas={t_vqc:.2f}s")


=== WYNIKI ===
VQC (ZZFeatureMap + TwoLocal) | PCA=2 | reps_fm=2 | reps_ansatz=2
Accuracy (test): 0.5607
Czas wykonania (s): 1296.55

[Porównanie] Baseline vs VQC
- Baseline  : acc=0.4830, czas=0.06s
- VQC       : acc=0.5607, czas=1296.55s
