In [None]:
##########################################################
# 1. IMPORT ALL PACKAGES
##########################################################
import pandas as pd
import numpy as np
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import math
import seaborn as sns

In [None]:
##########################################################
# 2. LOAD DATASET
##########################################################
data = pd.read_csv("../input/breast-cancer-wisconsin-data/data.csv",header=0)# header 0 means the first row is name of the coloumn 

# Delete unused columns
data.drop(["Unnamed: 32","id"], axis=1, inplace=True)

# Change label M(ganas = malignant) = 1 dan B(jinak = benign) = 0
data.diagnosis = [1 if each == "M" else 0 for each in data.diagnosis]

# Test select malignant data
m_data = data.loc[data['diagnosis'] == 1]

# Test select benign data
b_data = data.loc[data['diagnosis'] == 0]

# View sample data
b_data.head(10) 

In [None]:
##########################################################
# 3. SHARE TO TEST AND TRAIN DATA
##########################################################
x = data.iloc[:, 1:]
y = data['diagnosis'].tolist()
print(x)

# Share test and train data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)

In [None]:
# ##########################################################
# # 5. FEATURE NORMALIZATION FOR BOTH (TEST & TRAIN)
# ##########################################################
# # Proceed to normalize the features because the pixel intensities are currently between the range of 0 and 255
# # print((min(x_train[2]), max(x_train[2])))

# # Normalizing the data
# x_train = x_train/255.0
# x_test = x_test/255.0

# # Printing the shape of the Datasets
# print('x_train shape:', x_train.shape)
# print(x_train.shape[0], 'train samples')
# print(x_test.shape[0], 'test samples')

In [None]:
##########################################################
# 6. TRAIN SVM ALGORITHM
##########################################################

model_clf = svm.SVC(C=10, kernel='linear', gamma=0.0001)

# Train the svm algorithm
model_clf.fit(x_train, y_train)

In [None]:
##########################################################
# 6. APPLY THE TRAINED LEARNER TO TEST NEW DATA
##########################################################
# Apply the trained svm to make prediction of test data
y_pred = model_clf.predict(x_test)

In [None]:
##########################################################
# 7. CONFUSION MATRIX
##########################################################
# Actual and predicted classes
lst_actual_class = y_test
lst_predicted_class = y_pred

# label M(ganas = malignant) = 1 dan B(jinak = benign) = 0
lst_classes = [0, 1]

# Compute binary-class confusion matrix 
tn, fp, fn, tp = confusion_matrix(lst_actual_class, lst_predicted_class, labels=lst_classes).ravel()
sensitivity = round(tp/(tp+fn)*100, 4);
specificity = round(tn/(tn+fp)*100, 4);
accuracy = round((tp+tn)/(tp+fp+tn+fn)*100, 4);
balanced_accuracy = round(((sensitivity+specificity)/2),4)
precision = round(tp/(tp+fp), 4)*100;
f1Score = round((2*tp/(2*tp + fp + fn))*100, 4);
mcc = round(((tp*tn)-(fp*fn))/(math.sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))), 4);

print("TP={0}, FP={1}, TN={2}, FN={3}".format(tp, fp, tn, fn));
print("Sensitivity: {0}%".format(sensitivity));
print("Specificity: {0}%".format(specificity));
print("Accuracy: {0}%".format(accuracy));
print("Balanced Accuracy: {0}%".format(balanced_accuracy));
print("Precision: {0}%".format(precision));
print("F1-Score: {0}%".format(f1Score));
print("MCC: {0}".format(mcc));