In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import missingno as miss

Predictor variable use in classifying breast cancer, its features are computed for each cell nucleus:
1. id
1. diagnosis
1. radius_mean
1. texture_mean
1. perimeter_mean
1. area_mean
1. smoothness_mean
1. compactness_mean
1. concavity_mean
1. concave points_mean
1. symmetry_mean
1. fractal_dimension_mean
1. radius_se
1. texture_se
1. perimeter_se
1. area_se
1. smoothness_se
1. compactness_se
1. concavity_se
1. concave points_se
1. symmetry_se
1. fractal_dimension_se
1. radius_worst
1. texture_worst
1. perimeter_worst
1. area_worst
1. smoothness_worst
1. compactness_worst
1. concavity_worst
1. concave points_worst
1. symmetry_worst
1. fractal_dimension_worst

In [None]:
FILEPATH = '/kaggle/input/breast-cancer-wisconsin-data/data.csv'

In [None]:
df = pd.read_csv(FILEPATH)

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.sample(3)

In [None]:
df.shape

In [None]:
# show 50-54 row
df[50:55]

In [None]:
df.isnull().any().any()

In [None]:
df.isnull().any()

In [None]:
df.isnull().any().any().sum()

In [None]:
miss.matrix(df)

In [None]:
miss.dendrogram(df)

In [None]:
miss.bar(df)

In [None]:
df = df.drop(columns = ['Unnamed: 32'])

In [None]:
diag_se = df['diagnosis'].value_counts()

In [None]:
diag_se

In [None]:
import seaborn as sns

sns.barplot(diag_se.index, diag_se.values)

In [None]:
sns.heatmap(df.corr(), square = False, mask = False)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
y = df['diagnosis']
X = df.drop(['diagnosis'], axis = 1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 23)

In [None]:
def show_split_data(X_train, X_test, y_train, y_test):
    
    print(f'X train shape : {X_train.shape}')
    print(f'Y train shape : {y_train.shape}')
    print(f'X test shape  : {X_train.shape}')
    print(f'Y test shape  : {y_train.shape}')

In [None]:
show_split_data(X_train, X_test, y_train, y_test)

## Predict with various Algorithms

In [None]:
# Confusion matrix
from sklearn.metrics import confusion_matrix

In [None]:
import matplotlib.pyplot as plt

def show_confusion_matrix(_model_cm, title = None):
    
    f, ax = plt.subplots(figsize = (5, 5))
    
    sns.heatmap(_model_cm, annot = True, linewidth = 0, linecolor = 'red', fmt = 'g', ax = ax, cmap = 'Greens')
    
    # cmap colors:
    # YlGnBu, Blues, BuPu, Greens
    
    plt.title(title + ' Confusion Matrix')
    plt.xlabel('y Predict')
    plt.ylabel('y test')
    
    plt.show()

In [None]:
def get_metrics(model_cm):
    
    total = sum(sum(model_cm))
    
    accuracy = (model_cm[0, 0] + model_cm[1, 1]) / total
    accuracy = float("{:.2f}".format(accuracy))

    sensitivity = model_cm[0, 0] / (model_cm[0, 0] + model_cm[0, 1])
    sensitivity = float("{:.2f}".format(sensitivity))

    specificity = model_cm[1, 1]/(model_cm[1, 0] + model_cm[1, 1])
    specificity = float("{:.2f}".format(specificity))
    
    return accuracy, sensitivity, specificity

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2)

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB

In [None]:
def predict_with_model(model):
    
    model = model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    return y_pred, accuracy

In [None]:
def show_metrics(model_cm):

    total = sum(sum(model_cm))
    
    accuracy = (model_cm[0, 0] + model_cm[1, 1]) / total
    accuracy = float("{:.2f}".format(accuracy))

    sensitivity = model_cm[0, 0] / (model_cm[0, 0] + model_cm[0, 1])
    sensitivity = float("{:.2f}".format(sensitivity))

    specificity = model_cm[1, 1]/(model_cm[1, 0] + model_cm[1, 1])
    specificity = float("{:.2f}".format(specificity))
    
    print(f'accuracy : {accuracy}, sensitivity : {sensitivity}, specificity : {specificity}')

In [None]:
best_model_accuracy = 0
best_model = None

models = [
    MLPClassifier(),
    RandomForestClassifier(),
    KNeighborsClassifier(),
    LogisticRegression(solver = "liblinear"),
    DecisionTreeClassifier(),
    GaussianNB()
]

for model in models:
    
    model_name = model.__class__.__name__

    y_pred, accuracy = predict_with_model(model)
    
    print("-" * 30)
    print(model_name + ": " )
    
    current_model_cm = confusion_matrix(y_test, y_pred)
    show_metrics(current_model_cm)
    
    if(accuracy > best_model_accuracy):
        best_model_accuracy = accuracy
        best_model = model_name
    
    print("Accuracy: {:.2%}".format(accuracy))
    
    show_confusion_matrix(current_model_cm, model_name)

In [None]:
print("Best Model : {}".format(best_model))
print("Best Model Accuracy : {:.2%}".format(best_model_accuracy))

**To Do:**

* Do more code cleanup
* Do some clear documentation
