In [1]:
import sys, subprocess, time, warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

from qiskit.circuit.library import ZZFeatureMap, TwoLocal
from qiskit_aer.primitives import Estimator
from qiskit_machine_learning.neural_networks import EstimatorQNN
from qiskit_machine_learning.algorithms.classifiers import NeuralNetworkClassifier
from qiskit_machine_learning.optimizers import COBYLA

In [2]:
# ===== Parametry do łatwej zmiany =====
data_path   = "countsAll_fixed_07_07_23.csv"  # ścieżka do pliku z danymi
sep         = "\t"                            # separator (w Twoim pliku jest tab)
n_components_pca = 4                          # liczba komponentów PCA = liczba kubitów
test_size   = 0.20                            # ułamek danych do testu
random_state = 42                             # ziarno losowe
maxiter     = 200                             # iteracje optymalizatora
entanglement = "linear"                       # "linear" | "full" | lista par
reps_feature = 2                              # głębokość feature map
reps_ansatz  = 2  

In [3]:
def infer_labels_from_columns(columns):
    """
    Etykieta 0: próbki zdrowe (zawierają '-HD-' w nazwie),
    Etykieta 1: próbki z RNA nowotworowym (pozostałe).
    """
    return np.array([0 if "-HD-" in c else 1 for c in columns], dtype=int)

In [4]:
def build_vqc_qnn(num_features: int):
    """
    Buduje EstimatorQNN z ZZFeatureMap i TwoLocal (VQC).
    """
    feature_map = ZZFeatureMap(feature_dimension=num_features, reps=reps_feature, entanglement=entanglement)
    ansatz = TwoLocal(
        num_qubits=num_features,
        reps=reps_ansatz,
        rotation_blocks=["ry", "rz"],
        entanglement_blocks="cz",
        entanglement=entanglement,
    )
    estimator = Estimator()  # Aer backend

    qnn = EstimatorQNN(
        circuit=feature_map.compose(ansatz),
        input_params=feature_map.parameters,
        weight_params=ansatz.parameters,
        estimator=estimator,
    )

    optimizer = COBYLA(maxiter=maxiter)   # tutaj ustawiamy maxiter
    clf = NeuralNetworkClassifier(
        neural_network=qnn,
        optimizer=optimizer,
        one_hot=False,    # bo etykiety to 0/1
    )
    return clf

In [5]:
# ===== 1) Wczytanie i przygotowanie danych =====
print("Wczytywanie danych...")
df = pd.read_csv(data_path, sep=sep)

Wczytywanie danych...


In [6]:
# Wiersze = geny, kolumny = próbki; transpozycja => próbki x geny
X = df.T
y = infer_labels_from_columns(X.index.tolist())

print(f"Liczba cech (genów): {X.shape[1]}, liczba próbek: {X.shape[0]}")
print(f"Klasy: 0 (zdrowe) = {(y==0).sum()}, 1 (nowotworowe) = {(y==1).sum()}")

Liczba cech (genów): 5346, liczba próbek: 2351
Klasy: 0 (zdrowe) = 280, 1 (nowotworowe) = 2071


In [7]:
# standaryzacja i PCA (redukcja do n_components_pca = liczby kubitów)
scaler = StandardScaler(with_mean=True, with_std=True)
X_scaled = scaler.fit_transform(X.values)

pca = PCA(n_components=n_components_pca, random_state=random_state)
X_pca = pca.fit_transform(X_scaled)

X_train, X_test, y_train, y_test = train_test_split(
    X_pca, y, test_size=test_size, stratify=y, random_state=random_state
)

In [8]:
# ===== 2) Szybki klasyczny baseline (LogReg + PCA) =====
t0 = time.time()
logreg = LogisticRegression(max_iter=2000, class_weight="balanced")
logreg.fit(X_train, y_train)
y_pred_lr = logreg.predict(X_test)
acc_lr = accuracy_score(y_test, y_pred_lr)
t_lr = time.time() - t0
print(f"[Baseline] LogisticRegression+PCA={n_components_pca}: accuracy={acc_lr:.4f}, czas={t_lr:.2f}s")

[Baseline] LogisticRegression+PCA=4: accuracy=0.6879, czas=0.03s


In [None]:
# ===== 3) VQC (EstimatorQNN) =====
print("Trenowanie VQC (EstimatorQNN)...")
t0 = time.time()
vqc = build_vqc_qnn(num_features=n_components_pca)
vqc.fit(X_train, y_train)
y_pred_vqc = vqc.predict(X_test)
acc_vqc = accuracy_score(y_test, y_pred_vqc)
t_vqc = time.time() - t0

Trenowanie VQC (EstimatorQNN)...


In [None]:
print("\n=== WYNIKI ===")
print(f"VQC (ZZFeatureMap + TwoLocal) | PCA={n_components_pca} | reps_fm={reps_feature} | reps_ansatz={reps_ansatz}")
print(f"Accuracy (test): {acc_vqc:.4f}")
print(f"Czas wykonania (s): {t_vqc:.2f}")

print("\n[Porównanie] Baseline vs VQC")
print(f"- Baseline  : acc={acc_lr:.4f}, czas={t_lr:.2f}s")
print(f"- VQC       : acc={acc_vqc:.4f}, czas={t_vqc:.2f}s")