# Breast Cancer Coimbra
# Kernel Support Vector Machine

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

## Importing dataset

In [2]:
dataset = pd.read_csv('dataR2.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [3]:
dataset.isnull().sum() # Checking & finding the missing values of dataset 

Age               0
BMI               0
Glucose           0
Insulin           0
HOMA              0
Leptin            0
Adiponectin       0
Resistin          0
MCP.1             0
Classification    0
dtype: int64

## Splitting dataset into the Training Set and Test Set

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

In [5]:
print(X_train)

[[7.50000000e+01 2.30000000e+01 8.30000000e+01 4.95200000e+00
  1.01383947e+00 1.71270000e+01 1.15789900e+01 7.09130000e+00
  3.18302000e+02]
 [4.30000000e+01 3.44221736e+01 8.90000000e+01 2.31940000e+01
  5.09185613e+00 3.12128000e+01 8.30095500e+00 6.71026000e+00
  9.60246000e+02]
 [5.90000000e+01 2.28328793e+01 9.80000000e+01 6.86200000e+00
  1.65877413e+00 1.49037000e+01 4.23010500e+00 8.20490000e+00
  3.55310000e+02]
 [5.40000000e+01 2.42187500e+01 8.60000000e+01 3.73000000e+00
  7.91257333e-01 8.68740000e+00 3.70523000e+00 1.03445500e+01
  6.35049000e+02]
 [3.50000000e+01 3.52507611e+01 9.00000000e+01 6.81700000e+00
  1.51337400e+00 5.06094000e+01 6.96689500e+00 2.20370300e+01
  6.67928000e+02]
 [7.10000000e+01 3.03000000e+01 1.02000000e+02 8.34000000e+00
  2.09834400e+00 5.65020000e+01 8.13000000e+00 4.29890000e+00
  2.00976000e+02]
 [4.50000000e+01 2.68500000e+01 9.20000000e+01 3.33000000e+00
  7.55688000e-01 5.46800000e+01 1.21000000e+01 1.09600000e+01
  2.68230000e+02]
 [4.90

In [6]:
print(y_train)

[1 1 2 2 1 1 2 1 2 2 1 1 1 2 1 2 1 1 2 2 1 1 2 2 1 1 2 1 1 2 1 1 1 2 1 2 2
 2 2 1 2 1 2 2 2 2 2 1 2 1 2 2 2 1 2 2 2 2 1 1 2 1 2 1 1 1 2 2 2 2 2 2 2 1
 2 2 2 1 2 1 2]


In [7]:
print(X_test)

[[7.10000000e+01 2.55102041e+01 1.12000000e+02 1.03950000e+01
  2.87179200e+00 1.90653000e+01 5.48610000e+00 4.27447000e+01
  7.99898000e+02]
 [8.60000000e+01 2.11111111e+01 9.20000000e+01 3.54900000e+00
  8.05386400e-01 6.69940000e+00 4.81924000e+00 1.05763500e+01
  7.73920000e+02]
 [7.50000000e+01 2.73000000e+01 8.50000000e+01 5.19700000e+00
  1.08963767e+00 1.03900000e+01 9.00080500e+00 7.57670000e+00
  3.35393000e+02]
 [7.60000000e+01 2.92184076e+01 8.30000000e+01 5.37600000e+00
  1.10064640e+00 2.85620000e+01 7.36996000e+00 8.04375000e+00
  6.98789000e+02]
 [3.40000000e+01 2.14700000e+01 7.80000000e+01 3.46900000e+00
  6.67435600e-01 1.45700000e+01 1.31100000e+01 6.92000000e+00
  3.54600000e+02]
 [7.80000000e+01 2.53000000e+01 6.00000000e+01 3.50800000e+00
  5.19184000e-01 6.63300000e+00 1.05672950e+01 4.66380000e+00
  2.09749000e+02]
 [5.40000000e+01 3.60500000e+01 1.19000000e+02 1.19100000e+01
  3.49598200e+00 8.92700000e+01 8.01000000e+00 5.06000000e+00
  2.18280000e+02]
 [6.60

In [8]:
print(y_test)

[2 1 1 1 1 1 2 1 2 1 1 1 2 2 1 1 2 2 2 2 2 2 2 1 1 2 1 1 2 1 2 2 2 1 2]


## Feature Scaling

In [9]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [10]:
print(X_train)

[[ 1.03944148e+00 -9.48520008e-01 -7.42511995e-01 -5.49208151e-01
  -4.84911260e-01 -5.34455593e-01  2.78028773e-01 -6.14619876e-01
  -6.18326144e-01]
 [-9.34355090e-01  1.39653578e+00 -4.56568390e-01  1.10028169e+00
   5.17630919e-01  1.96899611e-01 -2.17921655e-01 -6.46120427e-01
   1.40358129e+00]
 [ 5.25431958e-02 -9.82831099e-01 -2.76529824e-02 -3.76500905e-01
  -3.26360117e-01 -6.49892560e-01 -8.33821058e-01 -5.22558638e-01
  -5.01763406e-01]
 [-2.55862519e-01 -6.98301719e-01 -5.99540192e-01 -6.59704620e-01
  -5.39630992e-01 -9.72651889e-01 -9.13232041e-01 -3.45673915e-01
   3.79320247e-01]
 [-1.42780423e+00  1.56665085e+00 -4.08911122e-01 -3.80569923e-01
  -3.62105377e-01  1.20399927e+00 -4.19758312e-01  6.20942625e-01
   4.82878025e-01]
 [ 7.92716911e-01  5.50223385e-01  1.62976088e-01 -2.42856239e-01
  -2.18295985e-01  1.50995162e+00 -2.43786298e-01 -8.45467397e-01
  -9.87863537e-01]
 [-8.10992805e-01 -1.58086849e-01 -3.13596587e-01 -6.95873677e-01
  -5.48375379e-01  1.4153507

In [11]:
print(X_test)

[[ 7.92716911e-01 -4.33156750e-01  6.39548762e-01 -5.70377096e-02
  -2.81510424e-02 -4.33816241e-01 -6.43795247e-01  2.33284432e+00
   8.98539086e-01]
 [ 1.71793405e+00 -1.33632271e+00 -3.13596587e-01 -6.76071118e-01
  -5.36157493e-01 -1.07587174e+00 -7.44687856e-01 -3.26511025e-01
   8.16717134e-01]
 [ 1.03944148e+00 -6.56985572e-02 -6.47197460e-01 -5.27054604e-01
  -4.66276983e-01 -8.84250423e-01 -1.12037823e-01 -5.74491890e-01
  -5.64495249e-01]
 [ 1.10112262e+00  3.28164554e-01 -7.42511995e-01 -5.10868951e-01
  -4.63570589e-01  5.92662235e-02 -3.58776578e-01 -5.35880898e-01
   5.80079783e-01]
 [-1.48948538e+00 -1.26264020e+00 -9.80798332e-01 -6.83304930e-01
  -5.70071405e-01 -6.67218749e-01  5.09662988e-01 -6.28781235e-01
  -5.03999667e-01]
 [ 1.22448491e+00 -4.76313185e-01 -1.83862915e+00 -6.79778447e-01
  -6.06517671e-01 -1.07931932e+00  1.24964340e-01 -8.15301138e-01
  -9.60231541e-01]
 [-2.55862519e-01  1.73074044e+00  9.73149635e-01  7.99525933e-02
   1.25300219e-01  3.2113138

## Training "SVM" model on the Training set

In [12]:
from sklearn.svm import SVC
classifier = SVC(kernel='rbf', random_state=42)
classifier.fit(X_train, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=42, shrinking=True, tol=0.001,
    verbose=False)

## Predicting a new result

In [13]:
# print(classifier.predict(sc.transform([[feature-1, feature-2, ... , feature-n]])))

## Predicting the Test set results

In [14]:
y_pred = classifier.predict(X_test)

print('y_pred: ' + '\n', y_pred)
print('\n')

#reshape from "1 x length of column" to "length of column x 1" 
print('y_pred+y_test: '+'\n', np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),axis=1))

y_pred: 
 [2 1 1 1 1 1 2 1 2 1 1 1 2 2 1 1 2 2 2 1 2 2 2 1 2 2 2 1 2 1 2 1 2 2 2]


y_pred+y_test: 
 [[2 2]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [2 2]
 [1 1]
 [2 2]
 [1 1]
 [1 1]
 [1 1]
 [2 2]
 [2 2]
 [1 1]
 [1 1]
 [2 2]
 [2 2]
 [2 2]
 [1 2]
 [2 2]
 [2 2]
 [2 2]
 [1 1]
 [2 1]
 [2 2]
 [2 1]
 [1 1]
 [2 2]
 [1 1]
 [2 2]
 [1 2]
 [2 2]
 [2 1]
 [2 2]]


## Making the Confusion Matrix

In [15]:
from sklearn.metrics import confusion_matrix, classification_report
cm = confusion_matrix(y_test, y_pred)
cr = classification_report(y_test, y_pred)
print(cm)
print(cr)

[[14  3]
 [ 2 16]]
              precision    recall  f1-score   support

           1       0.88      0.82      0.85        17
           2       0.84      0.89      0.86        18

    accuracy                           0.86        35
   macro avg       0.86      0.86      0.86        35
weighted avg       0.86      0.86      0.86        35

