# 特徴量の重要度

このノートブックでは、決定木における特徴量の重要度について学習します。


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

plt.rcParams['font.family'] = 'DejaVu Sans'
plt.rcParams['figure.figsize'] = (12, 8)

# データの準備
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 決定木の訓練
tree = DecisionTreeClassifier(max_depth=3, random_state=42)
tree.fit(X_train, y_train)

# 特徴量の重要度
feature_importance = tree.feature_importances_
feature_names = iris.feature_names

# 可視化
plt.figure(figsize=(15, 5))

# 特徴量の重要度
plt.subplot(1, 3, 1)
plt.bar(feature_names, feature_importance)
plt.title('Feature Importance')
plt.xticks(rotation=45)
plt.ylabel('Importance')

# 重要度の累積
plt.subplot(1, 3, 2)
sorted_idx = np.argsort(feature_importance)[::-1]
cumulative_importance = np.cumsum(feature_importance[sorted_idx])
plt.plot(range(1, len(feature_names) + 1), cumulative_importance, 'o-')
plt.xlabel('Number of Features')
plt.ylabel('Cumulative Importance')
plt.title('Cumulative Feature Importance')
plt.grid(True, alpha=0.3)

# 重要度の分布
plt.subplot(1, 3, 3)
plt.pie(feature_importance, labels=feature_names, autopct='%1.1f%%')
plt.title('Feature Importance Distribution')

plt.tight_layout()
plt.show()

print("=== 特徴量の重要度 ===")
for name, importance in zip(feature_names, feature_importance):
    print(f"{name}: {importance:.4f}")

# 異なる深さでの特徴量重要度の変化
depths = [1, 2, 3, 4, 5]
importance_matrix = np.zeros((len(depths), len(feature_names)))

for i, depth in enumerate(depths):
    tree_depth = DecisionTreeClassifier(max_depth=depth, random_state=42)
    tree_depth.fit(X_train, y_train)
    importance_matrix[i] = tree_depth.feature_importances_

# 可視化
plt.figure(figsize=(12, 8))
for i, name in enumerate(feature_names):
    plt.plot(depths, importance_matrix[:, i], 'o-', label=name)

plt.xlabel('Tree Depth')
plt.ylabel('Feature Importance')
plt.title('Feature Importance vs Tree Depth')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

print(f"\n=== 深さによる特徴量重要度の変化 ===")
for depth in depths:
    print(f"深さ {depth}: {importance_matrix[depths.index(depth)]}")
