<a href="https://colab.research.google.com/github/komazawa-deep-learning/komazawa-deep-learning.github.io/blob/master/2024notebooks/2024_0620LFW_classifcaition_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# いくつかの分類器を用いた顔分類

* 参考 URL
[Faces recognition example using eigenfaces and SVM](https://scikit-learn.org/stable/auto_examples/applications/plot_face_recognition.html#sphx-glr-auto-examples-applications-plot-face-recognition-py)

# 準備

In [None]:
#from time import time
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import loguniform

from sklearn.datasets import fetch_lfw_people
from sklearn.decomposition import PCA
from sklearn.metrics import ConfusionMatrixDisplay, classification_report
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

# LFW データセットの読み込み
LFW とは Labeled Face in the Wild の略

In [None]:
lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)

# introspect the images arrays to find the shapes (for plotting)
n_samples, height, width = lfw_people.images.shape

# for machine learning we use the 2 data directly (as relative pixel positions info is ignored by this model)
X = lfw_people.data
n_features = X.shape[1]

# the label to predict is the id of the person
y = lfw_people.target
target_names = lfw_people.target_names
n_classes = target_names.shape[0]

print("総データセットサイズ")
print("サンプル数: %d" % n_samples)
print("特徴数: %d" % n_features)
print("クラス数 (識別すべき人物数): %d" % n_classes)
print(f'ターゲット名:{target_names}')

# データの表示

In [None]:
j_max = 7
i_max = 5
Ns = np.random.permutation(len(X))
fig, ax = plt.subplots(i_max, j_max, figsize=(j_max+1,i_max+3))

i, j = 0, 0
for idx in Ns[:i_max * j_max]:
    img = X[idx].reshape(height,width)
    ax[i,j].imshow(img, cmap='gray')
    ax[i,j].set_xticks([])
    ax[i,j].set_yticks([])
    ax[i,j].set_title(target_names[y[idx]].split(' ')[-1])
    j += 1
    if j == j_max:
        i+=1; j=0

plt.tight_layout()
plt.show()

# データセットを訓練データと検証データへ分割

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# scaler.inverse_transform(X_train)[0].reshape(h,w).max()

# 固有顔 (主成分分析) による特徴エンジニアリング

In [None]:
# 抽出する主成分数
n_components = 150

print(f"最大固有値上位 {n_components} 成分による固有顔，元データの次元数 {X_train.shape[0]} faces")
pca = PCA(n_components=n_components, svd_solver="randomized", whiten=True).fit(X_train)
eigenfaces = pca.components_.reshape((n_components, height, width))

print("入力データを固有顔による直交基底へ射影")
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)

# サポートベクターマシンによる分類

In [None]:
svc = SVC(kernel='rbf', class_weight='balanced') # , C=3155.325849438, gamma=0.0012014060871781294)
svc = svc.fit(X_train_pca, y_train)
preds = svc.predict(X_test_pca)
print(f'テストデータの分類結果: {(np.array(preds == y_test) * 1).mean() * 100:.2f}%')

preds = svc.predict(X_train_pca)
print(f'訓練データの分類結果  : {(np.array(preds == y_train) * 1).mean() * 100:.2f}%')

In [None]:
print("テストデータを用いた人物分類")
y_pred = svc.predict(X_test_pca)

print(classification_report(y_test, y_pred, target_names=target_names))
ConfusionMatrixDisplay.from_estimator(
    svc, X_test_pca, y_test, display_labels=target_names, xticks_rotation="vertical"
)
plt.tight_layout()
plt.show()

# [XGBoost](https://xgboost.readthedocs.io/en/stable/) による分類

In [None]:
try:
    from xgboost import XGBClassifier
except ImportError:
    !pip install xgboost
    from xgboost import XGBClassifier

In [None]:
xgb = XGBClassifier(n_estimators=10, max_depth=5, learning_rate=1, objective='binary:logistic')
#xgb = XGBClassifier(n_estimators=2, max_depth=2, learning_rate=1, objective='binary:logistic')

# fit model
xgb.fit(X_train_pca, y_train)

preds = xgb.predict(X_test_pca)
print(f'テストデータの分類結果: {(np.array(preds == y_test) * 1).mean() * 100:.2f}%')

preds = xgb.predict(X_train_pca)
print(f'訓練データの分類結果  : {(np.array(preds == y_train) * 1).mean() * 100:.2f}%')


In [None]:
print("テストデータを用いた人物分類")
y_pred = xgb.predict(X_test_pca)

print(classification_report(y_test, y_pred, target_names=target_names))
ConfusionMatrixDisplay.from_estimator(
    xgb, X_test_pca, y_test, display_labels=target_names, xticks_rotation="vertical"
)
plt.tight_layout()
plt.show()

# ナイーブベイズによる分類

In [None]:
from sklearn.naive_bayes import MultinomialNB

mNB = MultinomialNB()

# Use MinMaxScaler to scale the data to the range [0, 1]
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Fit the MultinomialNB model with non-negative scaled data
mNB.fit(X_train_scaled, y_train)

# Predict on scaled test data
preds = mNB.predict(X_test_scaled)
print(f'テストデータの分類結果: {(np.array(preds == y_test) * 1).mean() * 100:.2f}%')

# Predict on scaled train data
preds = mNB.predict(X_train_scaled)
print(f'訓練データの分類結果  : {(np.array(preds == y_train) * 1).mean() * 100:.2f}%')

In [None]:
print("テストデータを用いた人物分類")
y_pred = mNB.predict(X_test_scaled)

print(classification_report(y_test, y_pred, target_names=target_names))
ConfusionMatrixDisplay.from_estimator(
    mNB, X_test_scaled, y_test, display_labels=target_names, xticks_rotation="vertical"
)
plt.tight_layout()
plt.show()

# ランダムフォレストによる分類

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(
    random_state=42,
    n_jobs=-1,
    max_depth=5,
    n_estimators=100, oob_score=True
)

In [None]:
rf.fit(X_train_pca, y_train)

print(rf.oob_score_)

preds = rf.predict(X_test_pca)
print(f'テストデータの分類結果: {(np.array(preds == y_test) * 1).mean() * 100:.2f}%')

preds = rf.predict(X_train_pca)
print(f'訓練データの分類結果  : {(np.array(preds == y_train) * 1).mean() * 100:.2f}%')


In [None]:
print("テストデータを用いた人物分類")
preds = rf.predict(X_test_pca)

print(classification_report(y_test, preds, target_names=target_names))
ConfusionMatrixDisplay.from_estimator(
    rf, X_test_pca, y_test, display_labels=target_names, xticks_rotation="vertical"
)
plt.tight_layout()
plt.show()