In [None]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)
# to make this notebook's output stable across runs
np.random.seed(42)

In [None]:
from sklearn.datasets import load_iris


In [None]:
iris = load_iris()
X = iris.data[:,2:]
y= iris.target
print(X)

In [None]:
y

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
tree_clf = DecisionTreeClassifier(max_depth =2 )
tree_clf.fit(X,y)

In [None]:
import os
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "decision_trees"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

In [None]:
from graphviz import Source
from sklearn.tree import export_graphviz

export_graphviz(
        tree_clf,
        out_file=os.path.join(IMAGES_PATH, "iris_tree.dot"),
        feature_names=iris.feature_names[2:],
        class_names=iris.target_names,
        rounded=True,
        filled=True
    )

Source.from_file(os.path.join(IMAGES_PATH, "iris_tree.dot"))
#run below  code in command line
   # dot -Tpng iris_tree.dot -o iris_tree.png

## EXTRA 

In [None]:

from matplotlib.colors import ListedColormap
def plot_decision_boundary(clf, X, y, axes=[0, 7.5, 0, 3], 
                           iris=True, legend=False, plot_training=True):
    x1s = np.linspace(axes[0], axes[1], 100)
    x2s = np.linspace(axes[2], axes[3], 100)
    x1, x2 = np.meshgrid(x1s, x2s)
    X_new = np.c_[x1.ravel(), x2.ravel()]
    y_pred = clf.predict(X_new).reshape(x1.shape)
    custom_cmap = ListedColormap(['#fafab0','#9898ff','#a0faa0'])
    plt.contourf(x1, x2, y_pred, alpha=0.3, cmap=custom_cmap)
    if not iris:
        custom_cmap2 = ListedColormap(['#7d7d58','#4c4c7f','#507d50'])
        plt.contour(x1, x2, y_pred, cmap=custom_cmap2, alpha=0.8)
    if plot_training:
        plt.plot(X[:, 0][y==0], X[:, 1][y==0], "yo", label="Iris setosa")
        plt.plot(X[:, 0][y==1], X[:, 1][y==1], "bs", label="Iris versicolor")
        plt.plot(X[:, 0][y==2], X[:, 1][y==2], "g^", label="Iris virginica")
        plt.axis(axes)
    if iris:
        plt.xlabel("Petal length", fontsize=14)
        plt.ylabel("Petal width", fontsize=14)
    else:
        plt.xlabel(r"$x_1$", fontsize=18)
        plt.ylabel(r"$x_2$", fontsize=18, rotation=0)
    if legend:
        plt.legend(loc="lower right", fontsize=14)

plt.figure(figsize=(8, 4))
plot_decision_boundary(tree_clf, X, y)
plt.plot([2.45, 2.45], [0, 3], "k-", linewidth=2)
plt.plot([2.45, 7.5], [1.75, 1.75], "k--", linewidth=2)
plt.plot([4.95, 4.95], [0, 1.75], "k:", linewidth=2)
plt.plot([4.85, 4.85], [1.75, 3], "k:", linewidth=2)
plt.text(1.40, 1.0, "Depth=0", fontsize=15)
plt.text(3.2, 1.80, "Depth=1", fontsize=13)
plt.text(4.05, 0.5, "Depth=2", fontsize=11)

save_fig("decision_tree_decision_boundaries_plot")
plt.show()

In [None]:
tree_clf.predict_proba([[5, 1.5]])

In [None]:
tree_clf.predict([[5, 1.5]])

In [None]:
print(X , "\t" , y) 

In [None]:
tree_clf.predict_proba([[7,7.0]])

In [None]:
tree_clf.predict([[1.4,0.2]])

In [None]:
tree_clf.predict([[7,7.0]])

In [None]:
tree_clf.predict_proba([[8,10]])

In [None]:
from sklearn.datasets import make_moons

In [None]:
X_moon ,y_moon = make_moons(n_samples=100,noise=0.15)

In [None]:
X_moon

In [None]:
plt.hist(X_moon,bins=5)

In [None]:
from sklearn.tree import DecisionTreeRegressor

In [None]:
tree_reg = DecisionTreeRegressor(max_depth=2)

In [None]:
tree_reg.fit(X_moon,y_moon)

In [None]:
from graphviz import Source
from sklearn.tree import export_graphviz

export_graphviz(
        tree_reg,
        out_file=os.path.join(IMAGES_PATH, "moon_tree.dot"),
        rounded=True,
        filled=True
    )

Source.from_file(os.path.join(IMAGES_PATH, "moon_tree.dot"))

 # Question 07: Train and fine-tune a Decision Tree for the moons dataset by following thes steps:

#### Step a)

In [None]:
from sklearn.datasets import make_moons
X,y = make_moons(n_samples=10000, noise=0.4)

In [None]:
X,y

#### step b)

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,
                                                 random_state=42)

#### step c)

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
params = {'max_leaf_nodes':list(range(2,60)) ,'min_samples_split': [2, 3, 4]}
grid_tree = GridSearchCV(DecisionTreeClassifier(random_state=42),
                                                params,verbose=1,cv=5)
grid_tree.fit(X_train,y_train)

In [None]:
grid_tree.best_estimator_

In [None]:

from sklearn.metrics import accuracy_score

y_pred = grid_tree.predict(X_test)
accuracy_score(y_test, y_pred)

In [None]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,y_pred)

In [None]:
from sklearn.metrics import classification_report
classification_report(y_test,y_pred).upper()

In [None]:
n_trees = 1000
n_instances = 100

mini_sets = []

In [None]:
from sklearn.model_selection import ShuffleSplit
rs = ShuffleSplit(n_splits=n_trees, test_size=len(X_train) - n_instances, random_state=42)
for mini_train_index, mini_test_index in rs.split(X_train):
    X_mini_train = X_train[mini_train_index]
    y_mini_train = y_train[mini_train_index]
    mini_sets.append((X_mini_train, y_mini_train))

In [None]:
from sklearn.base import clone

forest = [clone(grid_tree.best_estimator_) for _ in range(n_trees)]

accuracy_scores = []

for tree, (X_mini_train, y_mini_train) in zip(forest, mini_sets):
    tree.fit(X_mini_train, y_mini_train)
    
    y_pred = tree.predict(X_test)
    accuracy_scores.append(accuracy_score(y_test, y_pred))

np.mean(accuracy_scores)

In [None]:

Y_pred = np.empty([n_trees, len(X_test)], dtype=np.uint8)

for tree_index, tree in enumerate(forest):
    Y_pred[tree_index] = tree.predict(X_test)

In [None]:
from scipy.stats import mode

y_pred_majority_votes, n_votes = mode(Y_pred, axis=0)

In [None]:

accuracy_score(y_test, y_pred_majority_votes.reshape([-1]))

                            --The End--