## Importing the data and necessary Libraries 

In [None]:
from sklearn.datasets import load_breast_cancer
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
data = load_breast_cancer()
type(data)

## Creating and Analysing the dataset

In [None]:
df = pd.DataFrame(data['data'], columns=data['feature_names'])

In [None]:
df['Target'] = data.target

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.isnull().sum()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
sns.heatmap(df.corr())

## Preparing the data for modelling

In [None]:
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
%matplotlib inline

In [None]:
X = df.drop('Target',axis=1)
y = df['Target']

In [None]:
print(df.shape)
print(X.shape)
print(y.shape)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
print("X_train: ", X_train.shape)
print("X_test: ", X_test.shape)
print("y_train: ", y_train.shape)
print("y_test: ", y_test.shape)

## Building the model and evaluating it

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report

from matplotlib import pyplot as plt
from sklearn import datasets
from sklearn import tree

In [None]:
dtree = DecisionTreeClassifier()
dtree.fit(X_train,y_train)

In [None]:
pred_dtree = dtree.predict(X_test)

In [None]:
tn, fp, fn, tp = confusion_matrix(y_test, pred_dtree).ravel()

In [None]:
print(" accuracy = ", accuracy_score(y_test, pred_dtree))
print(" f1_score = ", f1_score(y_test, pred_dtree))
print("Precision = \t\t", tp/(tp+fp))
print("Recall/Sensitivity = \t", tp/(tp + fn))
print("Specivity = \t\t", tn/(tn + fp))
print("\nClassification Matrix")
print(confusion_matrix(y_test, pred_dtree))
print(classification_report(y_test, pred_dtree))

## Visualising the Decision Tree

In [None]:
text_representation = tree.export_text(dtree)
print(text_representation)

In [None]:
fig = plt.figure(figsize=(25,20))
_ = tree.plot_tree(dtree, 
                   feature_names=data.feature_names,  
                   class_names=data.target_names,
                   filled=True)

## Optimising the Decision Tree performance and evaluating the model

In [None]:
dtree_op = DecisionTreeClassifier(criterion="entropy", max_depth=3)
dtree_op.fit(X_train,y_train)

In [None]:
pred_dtree_op = dtree_op.predict(X_test)

In [None]:
tn, fp, fn, tp = confusion_matrix(y_test, pred_dtree_op).ravel()

In [None]:
print("Accuracy = \t\t", accuracy_score(y_test, pred_dtree_op))
print("F1_score = \t\t", f1_score(y_test, pred_dtree_op))
print("Precision = \t\t", tp/(tp+fp))
print("Recall/Sensitivity = \t", tp/(tp + fn))
print("Specivity = \t\t", tn/(tn + fp))
print("\nClassification Matrix")
print(confusion_matrix(y_test, pred_dtree_op))
print(classification_report(y_test, pred_dtree_op))

## Visualising the Optimised Decision Tree

In [None]:
text_representation = tree.export_text(dtree_op)
print(text_representation)

In [None]:
fig = plt.figure(figsize=(25,20))
_ = tree.plot_tree(dtree_op, 
                   feature_names=data.feature_names,  
                   class_names=data.target_names,
                   filled=True)