In [2]:
import pandas as pd 
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

dataset = pd.read_csv('Breast_Cancer_Classification_Dataset.csv')

X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 0)

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Logistic Regression

In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score

logisticRegression = LogisticRegression()
logisticRegression.fit(X_train, y_train)

y_pred = logisticRegression.predict(X_test)

cm = confusion_matrix(y_test, y_pred)
print(cm)
logisticRegression_score = accuracy_score(y_test, y_pred)
print(logisticRegression_score)

[[45  1]
 [ 0 23]]
0.9855072463768116


# Decision Tree Classifier

In [4]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

decision_tree = DecisionTreeClassifier(criterion = 'entropy', splitter = 'random', max_depth = 50, random_state = 0)
decision_tree.fit(X_train, y_train)

y_pred = decision_tree.predict(X_test)

cm = confusion_matrix(y_test, y_pred)
print(cm)
DecisionTree_score = accuracy_score(y_test, y_pred)
print(DecisionTree_score)

[[46  0]
 [ 1 22]]
0.9855072463768116


# Random Forest Classifier

In [5]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

random_forest = RandomForestClassifier(n_estimators = 50, criterion = 'entropy', random_state = 0)
random_forest.fit(X_train, y_train)

y_pred = random_forest.predict(X_test)

cm = confusion_matrix(y_test, y_pred)
print(cm)
RandomForest_score = accuracy_score(y_test, y_pred)
print(RandomForest_score)

[[45  1]
 [ 0 23]]
0.9855072463768116


# Naive Bayes Classifier

In [6]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score

naiveBayes = GaussianNB()
naiveBayes.fit(X_train, y_train)

y_pred = naiveBayes.predict(X_test)

cm = confusion_matrix(y_test, y_pred)
print(cm)
NaiveBayes_score = accuracy_score(y_test, y_pred)
print(NaiveBayes_score)

[[43  3]
 [ 0 23]]
0.9565217391304348


# Kernel SVM

In [7]:
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score

classifier = SVC(kernel='rbf', random_state=0)
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

cm = confusion_matrix(y_test, y_pred)
print(cm)
kernelSVM_score = accuracy_score(y_test, y_pred)
print(kernelSVM_score)

[[44  2]
 [ 0 23]]
0.9710144927536232


# Summary

In [8]:
d = {'Algorithms': ['LogisticRegression', 'DecisionTreeClassifier', 'RandomForestClassifier', 'NaiveBayes', 'KernelSVM'], 
     'Accuracy': [logisticRegression_score, DecisionTree_score, RandomForest_score, NaiveBayes_score, kernelSVM_score]}

print(pd.DataFrame(data=d))

               Algorithms  Accuracy
0      LogisticRegression  0.985507
1  DecisionTreeClassifier  0.985507
2  RandomForestClassifier  0.985507
3              NaiveBayes  0.956522
4               KernelSVM  0.971014
