# Breast Cancer Detection

## Importing the libraries

In [8]:
import pandas as pd

## Importing the dataset

In [9]:
dataset = pd.read_csv('breast_cancer.csv')
X = dataset.iloc[:, 1:-1].values
y = dataset.iloc[:, -1].values

## Splitting the dataset into the Training set and Test set

In [10]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

# Training the Classification model on the Training set

## Logistic Regression

In [11]:
# List to store classifier
lst = []

from sklearn.linear_model import LogisticRegression
logistic_regression = LogisticRegression(random_state = 0)
logistic_regression.fit(x_train, y_train)

lst.append(logistic_regression)

## Linear SVM

In [13]:
from sklearn.svm import SVC
linear_svm = SVC(kernel = 'linear', random_state = 0)
linear_svm.fit(x_train, y_train)

lst.append(linear_svm)

## Radial Basis SVM

In [14]:
kernel_svm = SVC(kernel = 'rbf', random_state = 0)
kernel_svm.fit(x_train, y_train)

lst.append(kernel_svm)

## Decision Tree

In [15]:
from sklearn.tree import DecisionTreeClassifier
decision_tree = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
decision_tree.fit(x_train, y_train)

lst.append(decision_tree)

## Random Forest

In [16]:
from sklearn.ensemble import RandomForestClassifier
random_forest = RandomForestClassifier(n_estimators = 500, criterion = 'entropy', random_state = 0)
random_forest.fit(x_train, y_train)

lst.append(random_forest)

# Predicting the Test set results

In [18]:
from sklearn.metrics import accuracy_score

names = ["Logistic regression", 'Linear SVM', "RBF SVM", "Decision Tree", "Random Forest"]
for i in range(0, len(lst)):
  y_pred = lst[i].predict(x_test)

  accuracy = accuracy_score(y_test, y_pred)*100
  print(" Model : {},\t Accuracy : {}".format(names[i], accuracy))

 Model : Logistic regression,	 Accuracy : 95.62043795620438
 Model : Linear SVM,	 Accuracy : 95.62043795620438
 Model : RBF SVM,	 Accuracy : 96.35036496350365
 Model : Decision Tree,	 Accuracy : 95.62043795620438
 Model : Random Forest,	 Accuracy : 97.08029197080292


# Computing the accuracy with k-Fold Cross Validation

In [22]:
from sklearn.model_selection import cross_val_score

for i in range(0, len(lst)):
  accuracies = cross_val_score(estimator = lst[i], X = x_train, y = y_train, cv = 10)
  
  print("============{}============".format(names[i]))
  print(accuracies)
  print("Accuracy: {:.2f}".format(accuracies.mean()*100))
  print("Standard Deviation: {:.2f}".format(accuracies.std()*100))
  print("==================================")

[0.94545455 0.96363636 0.96363636 1.         0.94545455 1.
 0.96296296 0.96296296 0.98148148 0.94444444]
Accuracy: 96.70
Standard Deviation: 1.97
[0.94545455 0.96363636 0.96363636 1.         0.94545455 1.
 0.98148148 0.96296296 1.         0.94444444]
Accuracy: 97.07
Standard Deviation: 2.19
[0.92727273 0.96363636 0.96363636 0.98181818 0.96363636 1.
 0.98148148 0.96296296 1.         0.94444444]
Accuracy: 96.89
Standard Deviation: 2.17
[0.96363636 0.92727273 0.90909091 0.92727273 0.96363636 0.94545455
 0.90740741 0.94444444 1.         0.94444444]
Accuracy: 94.33
Standard Deviation: 2.65
[0.94545455 0.96363636 0.98181818 0.98181818 0.94545455 1.
 0.92592593 0.96296296 0.98148148 0.96296296]
Accuracy: 96.52
Standard Deviation: 2.09
