In [1]:
import sys, subprocess, time, warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [2]:
# ===== Parametry do łatwej zmiany =====
data_path   = "countsAll_fixed_07_07_23.csv"  # ścieżka do pliku z danymi
sep         = "\t"                            # separator (w Twoim pliku jest tab)
n_components_pca = 2                          # liczba komponentów PCA = liczba kubitów
test_size   = 0.20                            # ułamek danych do testu
random_state = 42                             # ziarno losowe
maxiter     = 200                             # iteracje optymalizatora
entanglement = "linear"                       # "linear" | "full" | lista par
reps_feature = 2                              # głębokość feature map
reps_ansatz  = 2 

In [3]:
# ===== 1) Wczytanie i przygotowanie danych =====
print("Wczytywanie danych...")
df = pd.read_csv(data_path, sep=sep).T

Wczytywanie danych...


In [5]:
metadata = pd.read_csv("SampleInfo_fixed_08_07_23.csv", delimiter=";")
metadata = metadata.set_index("id")
metadata["label"] = metadata["GroupAlternative"].apply(
    lambda x: 0 if x == "Asymptomatic controls" else 1
)
metadata = metadata[metadata["RealLocation"] != "Institute 5"]
df = df.merge(metadata, left_index=True, right_index=True)

In [6]:
X = df.drop(columns=metadata.columns)
y = df["label"]

In [7]:
print("Shape X:", X.shape)
print("Shape y:", y.shape)
print("Class balance:\n", y.value_counts())
print(f"Liczba cech (genów): {X.shape[1]}, liczba próbek: {X.shape[0]}")
print(f"Klasy: 0 (zdrowe) = {(y==0).sum()}, 1 (nowotworowe) = {(y==1).sum()}")

Shape X: (2060, 5346)
Shape y: (2060,)
Class balance:
 label
1    1706
0     354
Name: count, dtype: int64
Liczba cech (genów): 5346, liczba próbek: 2060
Klasy: 0 (zdrowe) = 354, 1 (nowotworowe) = 1706


In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [9]:
mlp = MLPClassifier(
    hidden_layer_sizes=(50, 30),  # 2 warstwy ukryte (50 i 30 neuronów)
    activation='relu',
    solver='adam',
    max_iter=300,
    random_state=42
)

In [10]:
#Trenowanie modelu z pomiarem czasu
start_time = time.time()
mlp.fit(X_train, y_train)
training_time = time.time() - start_time

In [11]:
# 5. Predykcja i ocena
y_pred = mlp.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

In [12]:
# 6. Wyniki
print(f"Dokładność (accuracy): {accuracy:.4f}")
print(f"Czas trenowania: {training_time:.2f} s")

Dokładność (accuracy): 0.8617
Czas trenowania: 36.83 s
