# 寄与率と累積寄与率

このノートブックでは、PCAの寄与率と累積寄与率について詳しく学習します。


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

plt.rcParams['font.family'] = 'DejaVu Sans'
plt.rcParams['figure.figsize'] = (12, 8)

# データの準備
iris = load_iris()
X, y = iris.data, iris.target

# データの標準化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 全成分でのPCA
pca_full = PCA()
X_pca_full = pca_full.fit_transform(X_scaled)

# 寄与率の計算
explained_variance_ratio = pca_full.explained_variance_ratio_
cumulative_variance_ratio = np.cumsum(explained_variance_ratio)

# 可視化
plt.figure(figsize=(15, 5))

# 寄与率
plt.subplot(1, 3, 1)
plt.bar(range(1, 5), explained_variance_ratio)
plt.xlabel('Principal Component')
plt.ylabel('Explained Variance Ratio')
plt.title('Explained Variance Ratio')

# 累積寄与率
plt.subplot(1, 3, 2)
plt.plot(range(1, 5), cumulative_variance_ratio, 'o-')
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance Ratio')
plt.title('Cumulative Explained Variance Ratio')
plt.grid(True, alpha=0.3)

# 寄与率と累積寄与率の比較
plt.subplot(1, 3, 3)
plt.bar(range(1, 5), explained_variance_ratio, alpha=0.7, label='Individual')
plt.plot(range(1, 5), cumulative_variance_ratio, 'ro-', label='Cumulative')
plt.xlabel('Principal Component')
plt.ylabel('Variance Ratio')
plt.title('Individual vs Cumulative')
plt.legend()

plt.tight_layout()
plt.show()

print("=== 寄与率と累積寄与率 ===")
for i in range(4):
    print(f"PC{i+1}: 寄与率={explained_variance_ratio[i]:.4f}, 累積={cumulative_variance_ratio[i]:.4f}")

# 次元削減の決定
print(f"\n=== 次元削減の決定 ===")
print(f"2成分で累積寄与率: {cumulative_variance_ratio[1]:.4f}")
print(f"3成分で累積寄与率: {cumulative_variance_ratio[2]:.4f}")
print(f"4成分で累積寄与率: {cumulative_variance_ratio[3]:.4f}")
