In [5]:
from sklearn.datasets import load_boston
from sklearn.ensemble import RandomForestRegressor
import numpy as np

#Load boston housing dataset as an example
boston = load_boston()

X = boston["data"]
Y = boston["target"]

names = boston["feature_names"]

rf = RandomForestRegressor()
rf.fit(X, Y)

print("Features sorted by their score:")

print(sorted(zip(map(lambda x: round(x, 4), rf.feature_importances_), names), reverse=True))

Features sorted by their score:
[(0.44269999999999998, 'LSTAT'), (0.38129999999999997, 'RM'), (0.061499999999999999, 'DIS'), (0.033300000000000003, 'CRIM'), (0.021000000000000001, 'PTRATIO'), (0.0201, 'NOX'), (0.0135, 'AGE'), (0.0091999999999999998, 'TAX'), (0.0091999999999999998, 'B'), (0.0041000000000000003, 'INDUS'), (0.0028, 'RAD'), (0.001, 'ZN'), (0.00029999999999999997, 'CHAS')]


In [10]:
size = 10000

np.random.seed(seed=10)

X_seed = np.random.normal(0, 1, size)

X0 = X_seed + np.random.normal(0, .1, size)
X1 = X_seed + np.random.normal(0, .1, size)
X2 = X_seed + np.random.normal(0, .1, size)

X = np.array([X0, X1, X2]).T
Y = X0 + X1 + X2
  
rf1 = RandomForestRegressor(n_estimators=20, max_features=2)
rf1.fit(X, Y);

print ("Scores for X0, X1, X2:", map(lambda x:round (x,3), rf1.feature_importances_))

Scores for X0, X1, X2: <map object at 0x1065ffac8>


In [12]:
from sklearn.cross_validation import ShuffleSplit
from sklearn.metrics import r2_score
from collections import defaultdict
 
X = boston["data"]
Y = boston["target"]
 
rf = RandomForestRegressor()
scores = defaultdict(list)
 
#crossvalidate the scores on a number of different random splits of the data
for train_idx, test_idx in ShuffleSplit(len(X), 100, .3):
    X_train, X_test = X[train_idx], X[test_idx]
    Y_train, Y_test = Y[train_idx], Y[test_idx]
    r = rf.fit(X_train, Y_train)
    acc = r2_score(Y_test, rf.predict(X_test))
    for i in range(X.shape[1]):
        X_t = X_test.copy()
        np.random.shuffle(X_t[:, i])
        shuff_acc = r2_score(Y_test, rf.predict(X_t))
        scores[names[i]].append((acc-shuff_acc)/acc)
print ("Features sorted by their score:")
print (sorted([(round(np.mean(score), 4), feat) for
              feat, score in scores.items()], reverse=True))


Features sorted by their score:
[(0.73509999999999998, 'LSTAT'), (0.5645, 'RM'), (0.080000000000000002, 'DIS'), (0.040599999999999997, 'CRIM'), (0.040500000000000001, 'NOX'), (0.022800000000000001, 'PTRATIO'), (0.016899999999999998, 'TAX'), (0.0112, 'AGE'), (0.0047999999999999996, 'INDUS'), (0.0047000000000000002, 'B'), (0.0032000000000000002, 'RAD'), (0.00080000000000000004, 'CHAS'), (0.0, 'ZN')]
