<a href="https://colab.research.google.com/github/komazawa-deep-learning/komazawa-deep-learning.github.io/blob/master/2021notebooks/2021_1221Lime_face_and_GradBoost.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

- date: 2021_1221
- author: Shin Asakawa <asakawa@ieee.org>
- filename: 2021_1221Lime_face_and_GradBoost.ipynb
- source: https://github.com/marcotcr/lime/blob/master/doc/notebooks/Tutorial%20-%20Faces%20and%20GradBoost.ipynb

# LIME による顔認識のデモ

上記ソースがそのままでは動かないので，若干修正した。

In [None]:
# LIME のインストール
!pip install lime > /dev/null 2>&1 

# 概要
<!-- # Overview -->

このノートブックでは、`lime_image` ツールを Olivetti Faces のような少し大きなデータセットに適用する方法を説明します。
このデータセットは非常に低解像度で，かなりの高速反復が可能です。
<!-- 
The notebook shows how the ```lime_image``` tools can be applied to a slightly larger dataset like the Olivetti Faces. 
The dataset is very low resolution and allows quite a bit of rapid-iteration. -->

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from skimage.color import gray2rgb, rgb2gray # since the code wants color images

# `montage2d` のインストール元を変更
#from skimage.util.montage import montage2d # to make a nice montage of the images
from skimage.util import montage as montage2d

In [None]:
# オリベッティ顔データセットの入手
from sklearn.datasets import fetch_olivetti_faces
faces = fetch_olivetti_faces()
# make each image color so lime_image works correctly
X_vec = np.stack([gray2rgb(iimg) for iimg in faces.data.reshape((-1, 64, 64))],0)
y_vec = faces.target.astype(np.uint8)

In [None]:
# データセットの表示
%matplotlib inline
fig, ax1 = plt.subplots(1,1, figsize = (20,20))
ax1.imshow(montage2d(X_vec[:,:,:,0]), cmap='gray', interpolation = 'none')
ax1.set_title('All Faces')
ax1.axis('off')

# パイプラインの設定
<!-- # Setup a Pipeline -->

ここでは，画像を処理するためのパイプラインを作成します．基本的には，画像を 1 次元ベクトルに平坦化し，RandomForest クラス分類器を利用します．
<!-- 
Here we make a pipeline for processing the images where basically we flatten the image back to 1d vectors and then use a RandomForest Classifier -->

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import Normalizer
from sklearn.decomposition import PCA

class PipeStep(object):
    """
    Wrapper for turning functions into pipeline transforms (no-fitting)
    """
    def __init__(self, step_func):
        self._step_func=step_func
    def fit(self,*args):
        return self
    def transform(self,X):
        return self._step_func(X)

makegray_step = PipeStep(lambda img_list: [rgb2gray(img) for img in img_list])
flatten_step = PipeStep(lambda img_list: [img.ravel() for img in img_list])

simple_rf_pipeline = Pipeline([
    ('Make Gray', makegray_step),
    ('Flatten Image', flatten_step),
    ('Normalize', Normalizer()),
    ('PCA', PCA(25)),
    ('XGBoost', GradientBoostingClassifier())
                              ])

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_vec, y_vec,
                                                    train_size=0.70)

In [None]:
simple_rf_pipeline.fit(X_train, y_train)

# モデルのスコアリング
<!-- # Scoring the Model -->

テストデータに対するモデルのスコアリングを表示し、その効果を確認します。
<!-- We show the scoring of the model on the test data to see how well it works -->


In [None]:
# compute on remaining test data
pipe_pred_test = simple_rf_pipeline.predict(X_test)
pipe_pred_prop = simple_rf_pipeline.predict_proba(X_test)
from sklearn.metrics import classification_report
print(classification_report(y_true=y_test, y_pred = pipe_pred_test))

In [None]:
# %load_ext autoreload
# %autoreload 2
import os,sys
try:
    import lime
except:
    sys.path.append(os.path.join('..', '..')) # add the current directory
    import lime

In [None]:
from lime import lime_image

In [None]:
from lime.wrappers.scikit_image import SegmentationAlgorithm
#explainer = lime_image.LimeImageExplainer(verbose = False)
explainer = lime_image.LimeImageExplainer(verbose = True)
segmenter = SegmentationAlgorithm('slic', n_segments=100, compactness=1, sigma=1)

In [None]:
%%time
explanation = explainer.explain_instance(X_test[0], 
                                         classifier_fn = simple_rf_pipeline.predict_proba, 
                                         top_labels=6, hide_color=0, num_samples=10000, segmentation_fn=segmenter)

In [None]:
# 動かないので，直下セルのように書き換えた

# from skimage.color import label2rgb
# temp, mask = explanation.get_image_and_mask(y_test[0], positive_only=True, num_features=5, hide_rest=False)
# fig, (ax1, ax2) = plt.subplots(1,2, figsize = (8, 4))
# ax1.imshow(label2rgb(mask,temp, bg_label = 0), interpolation = 'nearest')
# ax1.set_title('Positive Regions for {}'.format(y_test[0]))
# temp, mask = explanation.get_image_and_mask(y_test[0], positive_only=False, num_features=10, hide_rest=False)
# ax2.imshow(label2rgb(3-mask,temp, bg_label = 0), interpolation = 'nearest')
# ax2.set_title('Positive/Negative Regions for {}'.format(y_test[0]))

In [None]:
from skimage.color import label2rgb
N = explanation.top_labels[0]
temp, mask = explanation.get_image_and_mask(N, positive_only=True, num_features=5, hide_rest=False)
fig, (ax1, ax2) = plt.subplots(1,2, figsize = (8, 4))
ax1.imshow(label2rgb(mask,temp, bg_label = 0), interpolation = 'nearest')
ax1.set_title('Positive Regions for {}'.format(N))
temp, mask = explanation.get_image_and_mask(N, positive_only=False, num_features=10, hide_rest=False)
ax2.imshow(label2rgb(3-mask,temp, bg_label = 0), interpolation = 'nearest')
ax2.set_title('Positive/Negative Regions for {}'.format(N))

In [None]:
# now show them for each class
fig, m_axs = plt.subplots(2,6, figsize = (12,4))
for i, (c_ax, gt_ax) in zip(explanation.top_labels, m_axs.T):
    temp, mask = explanation.get_image_and_mask(i, positive_only=True, num_features=5, hide_rest=False, min_weight=0.01)
    c_ax.imshow(label2rgb(mask,temp, bg_label = 0), interpolation = 'nearest')
    c_ax.set_title('Positive for {}\nScore:{:2.2f}%'.format(i, 100*pipe_pred_prop[0, i]))
    c_ax.axis('off')
    face_id = np.random.choice(np.where(y_train==i)[0])
    gt_ax.imshow(X_train[face_id])
    gt_ax.set_title('Example of {}'.format(i))
    gt_ax.axis('off')

# 洞察の獲得
<!-- # Gaining Insight-->

アルゴリズムが間違えた分類の説明を見つけられるか
<!-- Can we find an explanation for a classification the algorithm got wrong -->

In [None]:
wrong_idx = np.random.choice(np.where(pipe_pred_test!=y_test)[0])

#print('Using #{} where the label was {} and the pipeline predicted {}'.format(wrong_idx, y_test[wrong_idx], pipe_pred_test[wrong_idx]))
print('{} 番目のデータを使用してみる。正解ラベルが {} だったけれど，予測ラベルが {} であったので'.format(wrong_idx, y_test[wrong_idx], pipe_pred_test[wrong_idx]))

41 番目のデータを使用してみる。正解ラベルが 3 だったけれど，予測ラベルが 12 であったので


In [None]:
%%time
explanation = explainer.explain_instance(X_test[wrong_idx], 
                                         classifier_fn = simple_rf_pipeline.predict_proba, 
                                         top_labels=6, hide_color=0, num_samples=10000, segmentation_fn=segmenter)

  return self.target_fn(args[0], **self.target_params)


  0%|          | 0/10000 [00:00<?, ?it/s]

Intercept -0.010579914804595138
Prediction_local [0.02389628]
Right: 0.01378783399462539
Intercept -0.016798572309566245
Prediction_local [0.37034592]
Right: 0.016785849443355844
Intercept 0.0017896845904993247
Prediction_local [0.00367947]
Right: 0.08181574337097498
Intercept -0.005636884816377536
Prediction_local [0.01075137]
Right: 0.08893167825888507
Intercept 0.005489360512966603
Prediction_local [0.00164778]
Right: 0.09818637098283214
Intercept -0.005125698434939239
Prediction_local [0.01098969]
Right: 0.3928685960627638
CPU times: user 15.2 s, sys: 12.1 s, total: 27.3 s
Wall time: 15.2 s


In [None]:
# now show them for each class
fig, m_axs = plt.subplots(2,6, figsize = (24,8))
for i, (c_ax, gt_ax) in zip(explanation.top_labels, m_axs.T):
    temp, mask = explanation.get_image_and_mask(i, positive_only=True, num_features=5, hide_rest=False, min_weight=0.01)
    c_ax.imshow(label2rgb(mask,temp, bg_label = 0), interpolation = 'nearest')
    c_ax.set_title('Positive for {}\nScore:{:2.2f}%'.format(i, 100*pipe_pred_prop[wrong_idx, i]))
    c_ax.axis('off')
    face_id = np.random.choice(np.where(y_train==i)[0])
    gt_ax.imshow(X_train[face_id])
    gt_ax.set_title('Example of {}'.format(i))
    gt_ax.axis('off')