In [None]:
import numpy as np
import seaborn as sns
import graphviz
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Get data

In [None]:
df = pd.read_csv("mushrooms.csv")

# Preprocess

In [None]:
labelencoder = LabelEncoder()
for column in df.columns:
    df[column] = labelencoder.fit_transform(df[column])

In [None]:
x = df.drop(['class'], axis=1)
y = df['class']

In [None]:
df = df.drop(["veil-type"], axis=1)

# Task 1: Preparing the data sets

<span style="font-size:1.25em">**Each set contains 4 subsets:** <br>
    - Feature train <br>
    - Feature test <br>
    - Label train <br>
    - Label test <br> </span>

In [None]:
set1 = train_test_split(x, y, train_size=0.4)
set2 = train_test_split(x, y, train_size=0.6)
set3 = train_test_split(x, y, train_size=0.8)
set4 = train_test_split(x, y, train_size=0.9)

# Functions

<span style="font-size:1.25em">**Fit data and generate model**</span>

In [None]:
def get_model(X, y, d):
    clf = DecisionTreeClassifier(criterion="entropy",max_depth=d)
    clf = clf.fit(X, y)
    return clf

<span style="font-size:1.25em">**From model, create a graph**</span>

In [None]:
def make_graph(clf):
    dot_data = export_graphviz(clf, out_file=None,
                               feature_names=x.columns,
                               filled=True, rounded=True,
                               special_characters=True)
    graph = graphviz.Source(dot_data)
    return graph

<span style="font-size:1.25em">**Visualize confusion matrix**</span>

In [None]:
def viz_cfm(cfm):
    sns.heatmap(cfm, annot=True,  linewidths=.5, cbar=None)
    plt.title('Decision Tree Classifier confusion matrix')
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

<span style="font-size:1.25em">**Perfrom tasks for each set:**<br>
    - Fit data and generate model <br>
    - Generate graph <br>
    - Estimate label <br>
    - Get classification report <br>
    - Generate confusion matrix <br>
    - Get accuracy score</span>




In [None]:
def tasks(dataset, depth):
    model = get_model(X=dataset[0], y=dataset[2],d=depth)
    graph = make_graph(model)
    y_est = model.predict(dataset[1])
    rp = classification_report(dataset[3], y_est)
    cm = confusion_matrix(dataset[3], y_est)
    acc = accuracy_score(dataset[3], y_est)
    return graph, rp, cm, acc

# Processing data

In [None]:
graph1, rp1, cm1, _ = tasks(set1, None)
graph2, rp2, cm2, _ = tasks(set2, None)
graph3, rp3, cm3, _ = tasks(set3, None)
graph4, rp4, cm4, _ = tasks(set4, None)

# Task 2: Building the decision tree classifiers

<span style="font-size:1.5em">**Set 1 (40/60)**</span>

In [None]:
graph1

<span style="font-size:1.5em">**Set 2 (60/40)**</span>

In [None]:
graph2

<span style="font-size:1.5em">**Set 3 (80/20)**</span>

In [None]:
graph3

<span style="font-size:1.5em">**Set 4 (90/10)**</span>

In [None]:
graph4

# Task 3: Evaluating the decision tree classifiers

<span style="font-size:1.5em">**Set 1 (40/60)**</span>

In [None]:
print(rp1)
viz_cfm(cm1)

<span style="font-size:1.5em">**Set 2 (60/40)**</span>

In [None]:
print(rp2)
viz_cfm(cm2)

<span style="font-size:1.5em">**Set 3 (80/20)**</span>

In [None]:
print(rp3)
viz_cfm(cm3)

<span style="font-size:1.5em">**Set 4 (90/10)**</span>

In [None]:
print(rp4)
viz_cfm(cm4)

# Task 4: The depth and accuracy of a decision tree

<span style="font-size:1.75em">**Max depth = None**</span>

In [None]:
acc_scores = []

In [None]:
graph, _, _, acc = tasks(set3, None)
acc_scores.append(acc)

In [None]:
graph

<span style="font-size:1.75em">**Max depth = 2**</span>

In [None]:
graph, _, _, acc = tasks(set3, 2)
acc_scores.append(acc)

In [None]:
graph

<span style="font-size:1.75em">**Max depth = 3**</span>

In [None]:
graph, _, _, acc = tasks(set3, 3)
acc_scores.append(acc)

In [None]:
graph

<span style="font-size:1.75em">**Max depth = 4**</span>

In [None]:
graph, _, _, acc = tasks(set3, 4)
acc_scores.append(acc)

In [None]:
graph

<span style="font-size:1.75em">**Max depth = 5**</span>

In [None]:
graph, _, _, acc = tasks(set3, 5)
acc_scores.append(acc)

In [None]:
graph

<span style="font-size:1.75em">**Max depth = 6**</span>

In [None]:
graph, _, _, acc = tasks(set3, 6)
acc_scores.append(acc)

In [None]:
graph

<span style="font-size:1.75em">**Max depth = 7**</span>

In [None]:
graph, _, _, acc = tasks(set3, 7)
acc_scores.append(acc)

In [None]:
graph

<span style="font-size:1.75em">**Accuracy score report**</span>

In [None]:
print("------------------------------------------------------------------")
print("         | None  |   2   |   3   |   4   |   5   |   6   |   7   |")
print("------------------------------------------------------------------")
print("Accuracy |",end='')
for i in acc_scores:
    print(' %.3f |'% i, end='')
print()
print("------------------------------------------------------------------")
