In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.feature_selection import SelectKBest, f_classif

# Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data"
names = ["id_number", "diagnosis", "radius_mean", "texture_mean", "perimeter_mean", "area_mean", 
         "smoothness_mean", "compactness_mean", "concavity_mean", "concave_points_mean", 
         "symmetry_mean", "fractal_dimension_mean", "radius_se", "texture_se", "perimeter_se", 
         "area_se", "smoothness_se", "compactness_se", "concavity_se", "concave_points_se", 
         "symmetry_se", "fractal_dimension_se", "radius_worst", "texture_worst", 
         "perimeter_worst", "area_worst", "smoothness_worst", "compactness_worst", 
         "concavity_worst", "concave_points_worst", "symmetry_worst", "fractal_dimension_worst"]
data = pd.read_csv(url, names=names)

# Preprocessing
# Drop id_number column as it's not relevant for prediction
data.drop("id_number", axis=1, inplace=True)

# Encode diagnosis (M: Malignant, B: Benign) to numerical values (0: Benign, 1: Malignant)
data["diagnosis"] = data["diagnosis"].map({"M": 1, "B": 0})

# Separate features (X) and target (y)
X = data.drop("diagnosis", axis=1)
y = data["diagnosis"]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Feature selection
selector = SelectKBest(f_classif, k=10)
X_train_selected = selector.fit_transform(X_train_scaled, y_train)
X_test_selected = selector.transform(X_test_scaled)

# Build and train Support Vector Machine (SVM) model
svm_model = SVC(kernel='linear', random_state=45552)
svm_model.fit(X_train_selected, y_train)

# Predictions
y_pred_train = svm_model.predict(X_train_selected)
y_pred_test = svm_model.predict(X_test_selected)

# Model evaluation
train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print("Training Accuracy:", train_accuracy)
print("Testing Accuracy:", test_accuracy)

# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred_test))

Training Accuracy: 0.9626373626373627
Testing Accuracy: 0.9824561403508771

Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99        71
           1       0.98      0.98      0.98        43

    accuracy                           0.98       114
   macro avg       0.98      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114



In [4]:
import joblib

joblib.dump(svm_model,"model_v1.sav")