In [8]:
# 1. Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler


In [18]:
# 2. Load and prepare the data
# Assuming the data is in a CSV file named 'breast_cancer_data.csv'
data = pd.read_csv('/Users/preetham/Downloads/breast-cancer.csv')
X = data.iloc[:, 1:10]  # Exclude case number and target
y = data.iloc[:, 10]    # Target variable

In [21]:
# 3. Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



In [23]:
# 4. Create functions for model training, cross-validation, and testing
def train_and_evaluate_model(model, param_grid, X_train, y_train, X_test, y_test):
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
    grid_search.fit(X_train, y_train)

    best_model = grid_search.best_estimator_
    cv_score = grid_search.best_score_
    test_score = best_model.score(X_test, y_test)

    return best_model, cv_score, test_score

In [25]:
# 5. Implement Linear SVM with hyperparameter tuning
linear_svm = SVC(kernel='linear')
linear_param_grid = {'C': [0.1, 1, 10, 100]}

best_linear_svm, linear_cv_score, linear_test_score = train_and_evaluate_model(
    linear_svm, linear_param_grid, X_train_scaled, y_train, X_test_scaled, y_test
)

print("Linear SVM Results:")
print(f"Best parameters: {best_linear_svm.get_params()}")
print(f"Cross-validation accuracy: {linear_cv_score:.4f}")
print(f"Test accuracy: {linear_test_score:.4f}")


Linear SVM Results:
Best parameters: {'C': 0.1, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 'scale', 'kernel': 'linear', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Cross-validation accuracy: 0.9624
Test accuracy: 0.9643


In [27]:
# 6. Implement Kernel SVM (RBF and Linear) with hyperparameter tuning
kernel_svm = SVC()
kernel_param_grid = {
    'kernel': ['rbf', 'linear'],
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.1, 1]
}

best_kernel_svm, kernel_cv_score, kernel_test_score = train_and_evaluate_model(
    kernel_svm, kernel_param_grid, X_train_scaled, y_train, X_test_scaled, y_test
)

print("\nKernel SVM Results:")
print(f"Best parameters: {best_kernel_svm.get_params()}")
print(f"Cross-validation accuracy: {kernel_cv_score:.4f}")
print(f"Test accuracy: {kernel_test_score:.4f}")


Kernel SVM Results:
Best parameters: {'C': 0.1, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 'scale', 'kernel': 'rbf', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Cross-validation accuracy: 0.9642
Test accuracy: 0.9643
