In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# reading the data
data_path = "./data/data.csv"
df = pd.read_csv(data_path, header=0)
# removing columns with NaN values 
df.dropna(axis=1, inplace=True)
# handling diagnosis feature
df['diagnosis'].replace(['M', 'B'],[0, 1], inplace=True)
# dropping the id column
df.drop(["id"], axis=1, inplace=True)
df.head()

X = df.drop(["diagnosis"], axis=1)
y = df["diagnosis"]

# splitting into train and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# standardizing data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [2]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import GridSearchCV

# doing grid search on sklearns SVC
c_values = np.arange(0.5, 10.5, 0.5)
degree_values = np.arange(2,8,1)

param_grid = {
    'kernel': ['linear', 'poly', 'rbf'],
    'C': c_values,
    'degree': degree_values,  # only for 'poly' kernel
    'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1.0, 10.0]  # only for 'rbf' kernel
}

svm_classifier = SVC()
grid_search = GridSearchCV(svm_classifier, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

print("Best Parameters (Grid Search):", grid_search.best_params_)
print("Best accuracy (Grid Search):", grid_search.best_score_)
print()
# getting the best svm model and testing it on test data
best_svm_model = grid_search.best_estimator_
y_pred = best_svm_model.predict(X_test)
report = classification_report(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Best Parameters (Grid Search): {'C': 7.5, 'degree': 2, 'gamma': 0.01, 'kernel': 'rbf'}
Best accuracy (Grid Search): 0.9846153846153847

Accuracy: 0.9824561403508771
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.96      0.98        47
           1       0.97      1.00      0.99        67

    accuracy                           0.98       114
   macro avg       0.99      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114



In [None]:
# my kernel regression implementation

# TODO