In [6]:
# Load the Breast Cancer dataset from scikit-learn.
# Split the dataset into training and testing sets.
# Use RFE with a Support Vector Machine (SVM) classifier to select features.
# Train an SVM model with the selected features and evaluate its performance.
# ---------- Tulkubaeva Nargiz -----------

# Импортируем необходимые библиотеки
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.feature_selection import RFE
from sklearn.metrics import accuracy_score

# Шаг 1: Загрузим набор данных
data = load_breast_cancer()
X = data.data
y = data.target

# Шаг 2: Разделим данные на обучающий и тестовый наборы
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Шаг 3: Масштабируем данные
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Шаг 4: Создаем модель SVM
svm = SVC(kernel='linear', random_state=42)

# Шаг 5: Используем RFE для выбора признаков
selector = RFE(svm, n_features_to_select=10, step=1)
selector = selector.fit(X_train_scaled, y_train)

# Шаг 6: Тренируем SVM модель с выбранными признаками
X_train_rfe = selector.transform(X_train_scaled)
X_test_rfe = selector.transform(X_test_scaled)

svm.fit(X_train_rfe, y_train)

# Шаг 7: Оцениваем производительность модели на тестовых данных
y_pred = svm.predict(X_test_rfe)
accuracy = accuracy_score(y_test, y_pred)

print(f'Accuracy: {accuracy * 100:.2f}%')

# Шаг 8: Выводим выбранные признаки
selected_features = selector.get_support(indices=True)
print("Selected feature indices:", selected_features)

Accuracy: 96.49%
Selected feature indices: [ 5  7 13 17 18 19 21 23 26 28]
