# Imports

In [23]:
# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

# Setup

In [24]:
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "decision_trees"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)


def image_path(fig_id):
    return os.path.join(IMAGES_PATH, fig_id)

# Load and Preprocess Input

In [25]:
import pandas as pd

df = pd.read_csv('../input/fetal-prediction/fetal_health.csv')
df

## Find Correlation

In [26]:
corr = df.corr()
corr.style.background_gradient(cmap='coolwarm')

### 'prolongued_decelerations', 'abnormal_short_term_variability' has maximum correlation

In [27]:
from sklearn.model_selection import train_test_split


X = df[['prolongued_decelerations', 'abnormal_short_term_variability']]
y = df.fetal_health

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)
X_train.shape, y_train.shape

## Scaling

In [28]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_train_scaled

# Model - Decision Tree

## Train Model

In [29]:
from sklearn.tree import DecisionTreeClassifier

tree_clf = DecisionTreeClassifier(random_state=42)
tree_clf.fit(X_train_scaled, y_train.to_numpy())

## Plot Boundaries for Decision Tree

In [30]:
from sklearn.tree import export_graphviz

export_graphviz(
    tree_clf,
    out_file=image_path("fetal.dot"),
    feature_names=X_train.columns,
    class_names=list(map(lambda x: str(x), y_train.unique())),
    rounded=True,
    filled=True
)

In [31]:
from matplotlib.colors import ListedColormap

def plot_decision_boundary(clf, X, y, axes=[-0.1, 1.1, -0.1, 1.1], legend=False, plot_training=True):
    x1s = np.linspace(axes[0], axes[1], 100)
    x2s = np.linspace(axes[2], axes[3], 100)
    x1, x2 = np.meshgrid(x1s, x2s)
    X_new = np.c_[x1.ravel(), x2.ravel()]
    y_pred = clf.predict(X_new).reshape(x1.shape)
    custom_cmap = ListedColormap(['#fafab0','#9898ff','#a0faa0'])
    plt.contourf(x1, x2, y_pred, alpha=0.3, cmap=custom_cmap)
    if plot_training:
        plt.plot(X[:, 0][y==1], X[:, 1][y==1], "yo", label="1")
        plt.plot(X[:, 0][y==2], X[:, 1][y==2], "bs", label="2")
        plt.plot(X[:, 0][y==3], X[:, 1][y==3], "g^", label="3")
        plt.axis(axes)
        plt.xlabel(r'prolongued_decelerations', fontsize=18, rotation=0)
        plt.ylabel(r'abnormal_short_term_variability', fontsize=18, rotation=90)
    if legend:
        plt.legend(loc="lower right", fontsize=14)

In [32]:
plt.figure(figsize=(12, 8))
plot_decision_boundary(tree_clf, X_train_scaled, y_train.to_numpy())
save_fig("decision_tree_decision_boundaries_plot")
plt.show()

## Predict on Test Data

In [33]:
from sklearn.metrics import confusion_matrix, accuracy_score
import seaborn as sns

y_pred = tree_clf.predict(scaler.transform(X_test))

sns.heatmap(confusion_matrix(y_pred, y_test), annot=True)

In [34]:
accuracy_score(y_pred, y_test)

In [35]:
from sklearn.tree import export_graphviz
from six import StringIO  
from IPython.display import Image  
import pydotplus

def stringify(s):
  return str(s)

dot_data = StringIO()
export_graphviz(tree_clf, out_file=dot_data,  
                filled=True, rounded=True,
                special_characters=True,feature_names = X.columns, class_names=list(map(stringify ,y.unique())))
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())  
graph.write_png('fetal_health.png')
Image(graph.create_png())