In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
import numpy as np

data = pd.read_csv("data.csv")
labels = pd.read_csv("labels.csv")

# Extract features (data) and target labels (classes)
X = data.values[:, 1:]  # Assuming the first column is an identifier, not a feature
y = labels.values[:, 1]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data (important for PCA)
scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.transform(X_test)

# Number of random genes to select for each iteration
num_random_genes = 100

# Number of iterations
num_iterations = 10

for iteration in range(num_iterations):
    # Randomly select genes for this iteration
    selected_genes_indices = np.random.choice(X_train_std.shape[1], size=num_random_genes, replace=False)
    
    # Extract the selected genes
    X_train_selected = X_train_std[:, selected_genes_indices]
    X_test_selected = X_test_std[:, selected_genes_indices]

    # Perform PCA for feature selection
    n_components = min(num_random_genes, X_train_selected.shape[1])
    pca = PCA(n_components=n_components)
    X_train_pca = pca.fit_transform(X_train_selected)
    X_test_pca = pca.transform(X_test_selected)

    # Initialize the SVM classifier with a linear kernel
    svm_classifier = SVC(kernel='linear')

    # Fit the SVM model on the training data with PCA-transformed features
    svm_classifier.fit(X_train_pca, y_train)

    # Predict on the test data with PCA-transformed features
    y_pred = svm_classifier.predict(X_test_pca)

    # Calculate and print accuracy for each iteration
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy (Iteration {iteration + 1}): {accuracy * 100:.2f}%')
f1 = f1_score(y_test, y_pred, average='weighted')  # You can adjust the average parameter as needed
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
print(f'F1 Score: {f1}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')


Accuracy (Iteration 10): 98.76%
F1 Score: 0.9876811594202898
Precision: 0.9883790823482268
Recall: 0.9875776397515528
