- Look through sklearn datasets
- Find a datasetnot used in example
- Train a random forest model on your dataset
- Determine which forest was the most accurate using the .score() function
- Visualize feature importance with a bar plot

In [1]:
from sklearn.datasets import load_boston
from sklearn.ensemble import RandomForestRegressor
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

model = RandomForestRegressor(n_estimators=5,random_state=0)
boston = load_boston()

df = pd.DataFrame(boston.data, columns=boston.feature_names) # load the dataset as a pandas data frame

# Train
model.fit(df, boston.target)
print ("score: %f" % model.score(df, boston.target))


bestScore={0:0}

for forest, tree in enumerate(model.estimators_):
    score = tree.score(df, boston.target)
    if score > list(bestScore.values())[0]:
        bestScore={forest:score}

print(bestScore)
# Extract single tree


treeSTD = np.std([tree.feature_importances_ for tree in model.estimators_], axis=0)
importances = model.feature_importances_

indices = np.argsort(importances)[::-1]
totalFeatures = len(boston.data[0])


# Print the feature ranking
print("\nFeature ranking:")
for f in range(totalFeatures):
    print("%d. feature %s (%f)" % (f + 1, boston.feature_names[indices[f]], importances[indices[f]]))

    

# Plot the feature importances of the forest
fig = plt.figure(figsize=[12,6])
plt.title("Feature importances")
plt.bar(range(totalFeatures), importances[indices],
       color="r", yerr=treeSTD[indices], align="center")
plt.xticks(range(totalFeatures),  boston.feature_names[indices])
plt.show()


score: 0.962417
{2: 0.8959191110370321}

Feature ranking:
1. feature RM (0.532567)
2. feature LSTAT (0.262672)
3. feature DIS (0.050350)
4. feature NOX (0.041677)
5. feature CRIM (0.032955)
6. feature PTRATIO (0.020703)
7. feature TAX (0.018564)
8. feature AGE (0.012537)
9. feature RAD (0.010819)
10. feature B (0.009453)
11. feature ZN (0.004241)
12. feature INDUS (0.003374)
13. feature CHAS (0.000088)


<Figure size 1200x600 with 1 Axes>