In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from PIL import Image

In [None]:
%matplotlib inline

In [None]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier

In [None]:
digits = load_digits()
dir(digits)

In [None]:
print(digits.DESCR)

In [None]:
X = pd.DataFrame(digits.data)
X.head()

In [None]:
y = np.array(digits.target)
y[:5]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

## Try with a Decision Tree

In [None]:
dt = DecisionTreeClassifier(min_samples_split=10)
dt.fit(X_train, y_train)

In [None]:
y_pred = dt.predict(X_test)

In [None]:
precision_score(y_true=y_test, y_pred=y_pred, average='weighted')

In [None]:
recall_score(y_true=y_test, y_pred=y_pred, average='weighted')

In [None]:
tree = plot_tree(dt);

In [None]:
for t in tree:
    try:
        print(np.array(t.get_text().split('\n'))[[1,3]])
    except Exception:
        pass

## Try with Random Forest

In [None]:
rf = RandomForestClassifier()

In [None]:
rf.fit(X_train, y_train)

In [None]:
y_pred = rf.predict(X_test)

In [None]:
precision_score(y_true=y_test, y_pred=y_pred, average='weighted')

In [None]:
recall_score(y_true=y_test, y_pred=y_pred, average='weighted')

In [None]:
sum(rf.feature_importances_  > 1 * 10**-1)

In [None]:
rf.feature_importances_

In [None]:
forest = rf
importances = forest.feature_importances_
std = np.std([tree.feature_importances_ for tree in forest.estimators_], axis=0)

forest_importances = pd.Series(importances, index=digits.feature_names)

fig, ax = plt.subplots(figsize=(10, 6))
forest_importances.plot.bar(yerr=std, ax=ax)
ax.set_title("Feature importances using MDI")
ax.set_ylabel("Mean decrease in impurity")
fig.tight_layout()