In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import (
    accuracy_score, roc_auc_score, matthews_corrcoef,
    precision_score, recall_score, f1_score, confusion_matrix
)

In [7]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [12]:
data = pd.read_csv("/content/gdrive/MyDrive/Bashar_Staging/TCGA LUAD staging merged_data_II_III.csv")

In [None]:
# Filter only Stage I and Stage III
data_binary = data[data['Stage'].isin(['Stage I', 'Stage III'])]

# Drop non-numeric columns (except the label)
non_numeric_columns = data_binary.select_dtypes(include=['object']).columns
columns_to_drop = [col for col in non_numeric_columns if col != 'Stage']
data_binary = data_binary.drop(columns=columns_to_drop)

# Features and labels
X = data_binary.drop(columns=['Stage'])
y = LabelEncoder().fit_transform(data_binary['Stage'])  # 0: Stage I, 1: Stage III

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train MLPClassifier
model = MLPClassifier(hidden_layer_sizes=(100,), activation='relu', solver='adam', max_iter=1000, random_state=42)
model.fit(X_train_scaled, y_train)

# Predictions
y_pred = model.predict(X_test_scaled)
y_proba = model.predict_proba(X_test_scaled)[:, 1]

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Safely extract TN, FP, FN, TP
if conf_matrix.shape == (2, 2):
    tn, fp, fn, tp = conf_matrix.ravel()
else:
    # One of the classes is missing â€” handle safely
    tn = fp = fn = tp = 0
    if y_test[0] == 0:
        tn = conf_matrix[0][0]
    else:
        tp = conf_matrix[0][0]

# Metrics
acc = accuracy_score(y_test, y_pred)
try:
    auc = roc_auc_score(y_test, y_proba)
except ValueError:
    auc = 0.0  # In case only one class is present
mcc = matthews_corrcoef(y_test, y_pred)
precision = precision_score(y_test, y_pred, zero_division=0)
recall = recall_score(y_test, y_pred, zero_division=0)
specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
f1 = f1_score(y_test, y_pred, zero_division=0)

# Display
print(f"\nAccuracy: {acc:.4f}")
print(f"AUC: {auc:.4f}")
print(f"MCC: {mcc:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall/Sensitivity: {recall:.4f}")
print(f"Specificity: {specificity:.4f}")
print(f"F1 Score: {f1:.4f}")