<a href="https://colab.research.google.com/github/mipypf/practical-mi-guide/blob/develop/chapter5/src/plotly_sample.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# インタラクティブな可視化を行う

### 右上の「接続」をクリックし、ランタイムに接続

## ライブラリをインストール

In [1]:
! pip uninstall -y thinc spacy fastai # 競合するライブラリを削除
! pip install scikit-learn==1.3.2 #ライブラリのverによっては次元圧縮の挙動が異なることがあるため注意
! pip install numpy==1.26.4
! pip install pandas==2.2.2
! pip install japanize_matplotlib==1.1.3
! pip install umap-learn==0.5.5
! pip install plotly==5.24.1

Found existing installation: thinc 8.3.6
Uninstalling thinc-8.3.6:
  Successfully uninstalled thinc-8.3.6
Found existing installation: spacy 3.8.5
Uninstalling spacy-3.8.5:
  Successfully uninstalled spacy-3.8.5
Found existing installation: fastai 2.7.19
Uninstalling fastai-2.7.19:
  Successfully uninstalled fastai-2.7.19
Collecting scikit-learn==1.3.2
  Downloading scikit_learn-1.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting numpy<2.0,>=1.17.3 (from scikit-learn==1.3.2)
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
Downloading scikit_learn-1.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.9/10.9 MB[0m [31m44.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-1.26.4-cp311-

### 「ランタイム」タブから「セッションを再起動する」を選択し、「はい」をクリック

## データセットのダウンロード
 - https://github.com/mipypf/practical-mi-guide/tree/main/chapter5/input から chapter3_demo_data_postprocess.csvをダウンロード

## Google Colabの準備

In [1]:
# Google Colabを利用する場合はTrue、そうでない場合はFalseとする
colab = True

In [2]:
# Google Colabの左端のファイル（ファイルのアイコン）をクリックし、chapter3_demo_data_postprocess.csvをドラッグ＆ドロップしてアップロード
if colab:
  INPUT_FILE_PATH = "./"
  OUTPUT_FILE_PATH = "./"
else:
  INPUT_FILE_PATH = "../input/"
  OUTPUT_FILE_PATH = "../output/"

## ライブラリをインポート

In [3]:
import warnings

warnings.filterwarnings("ignore")

import japanize_matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import seaborn as sns
import umap
from plotly import express as px
from sklearn.preprocessing import StandardScaler
from tqdm.notebook import tqdm

## グラフ表示の際のフォント種類やフォントサイズを設定

In [4]:
plt.rcParams["font.size"] = 20
plt.rcParams["axes.labelsize"] = 20
plt.rcParams["xtick.labelsize"] = 20
plt.rcParams["ytick.labelsize"] = 20
plt.rcParams["axes.titlesize"] = 20
plt.rcParams["font.family"] = "IPAexGothic"

## ファイルのコンフィグ情報を定義

In [5]:
class CFG:
    input_dir = INPUT_FILE_PATH
    output_dir = OUTPUT_FILE_PATH
    random_seed = 42
    exp_no = "eda_plotly"

## データを読み込み、データフレームに格納

In [6]:
train_df = pd.read_csv(f"{CFG.input_dir}/chapter3_demo_data_postprocess.csv")
train_df

Unnamed: 0,サンプルID,サンプル作製日,担当者,備考,材料１,材料２,材料３,材料４,材料５,材料６,...,乾燥温度,乾燥時間,擦過回数,擦過圧力,耐擦過性,画像濃度,粘度,表面張力,保存後粘度,保存後状態
0,DP_001,2024/1/19,高岡,,46.9,40,0,2,0,2.0,...,25,,10,5,2.4,1.06,9.0,35,,
1,DP_002,2024/1/19,高岡,,47.4,40,0,2,0,1.5,...,25,,10,5,2.4,1.07,9.0,38,,
2,DP_003,2024/1/19,高岡,,47.7,40,0,2,0,1.2,...,25,,10,5,2.4,1.07,9.0,40,,
3,DP_004,2024/1/19,高岡,,47.9,40,0,2,0,1.0,...,25,,10,5,2.4,1.08,9.0,41,,
4,DP_005,2024/1/19,高岡,,48.4,40,0,2,0,0.5,...,25,,10,5,2.4,1.09,9.0,44,,
5,DP_006,2024/1/30,福原,ref,47.4,40,0,2,0,1.5,...,25,,10,5,2.7,1.07,9.1,38,,
6,DP_007,2024/1/30,福原,,47.4,40,0,2,0,0.0,...,25,,10,5,2.7,1.05,9.1,37,,
7,DP_008,2024/1/30,福原,,47.4,40,0,2,0,0.0,...,25,,10,5,2.7,1.02,9.1,39,,
8,DP_009,2024/1/30,福原,,45.4,40,0,2,0,1.5,...,25,,10,5,3.1,1.07,10.3,36,,
9,DP_010,2024/1/30,福原,,43.4,40,0,2,0,1.5,...,25,,10,5,3.6,1.07,11.5,34,,


## 使用する説明変数をリストとして定義

In [7]:
numeric_parameter_cols = [
    # 'サンプルID',
    # 'サンプル名',
    # 'サンプル作成日',
    # '担当者',
    # '備考',
    "材料１",
    "材料２",
    "材料３",
    "材料４",
    "材料５",
    "材料６",
    "材料７",
    "材料８",
    "材料９",
    "材料１０",
    "材料１１",
    "材料１２",
    "材料１３",
    "塗布量",
    "乾燥方式",
    "乾燥温度",
    # '乾燥時間',
    "擦過回数",
    "擦過圧力",
    # '耐擦過性','画像濃度', '粘度', '表面張力', '保存後粘度'
]

## 目的変数をリストとして定義

In [8]:
objective_cols = ["耐擦過性", "画像濃度", "粘度", "表面張力", "保存後粘度"]

## 説明変数のスケーリング

In [9]:
scaler_x = StandardScaler()
X_train = scaler_x.fit_transform(train_df[numeric_parameter_cols])
X_train

array([[ 1.00830673,  0.75592895, -0.75438695,  0.55339859, -0.68624357,
         1.53976945, -0.1767767 , -0.1767767 ,  0.12768848, -0.35355339,
        -0.86677605, -0.2943724 ,  0.        ,  0.        , -1.16496475,
        -1.15482139,  0.        ,  0.        ],
       [ 1.16797142,  0.75592895, -0.75438695,  0.55339859, -0.68624357,
         0.31831772, -0.1767767 , -0.1767767 ,  0.12768848, -0.35355339,
        -0.86677605, -0.2943724 ,  0.        ,  0.        , -1.16496475,
        -1.15482139,  0.        ,  0.        ],
       [ 1.26377024,  0.75592895, -0.75438695,  0.55339859, -0.68624357,
        -0.41455331, -0.1767767 , -0.1767767 ,  0.12768848, -0.35355339,
        -0.86677605, -0.2943724 ,  0.        ,  0.        , -1.16496475,
        -1.15482139,  0.        ,  0.        ],
       [ 1.32763612,  0.75592895, -0.75438695,  0.55339859, -0.68624357,
        -0.903134  , -0.1767767 , -0.1767767 ,  0.12768848, -0.35355339,
        -0.86677605, -0.2943724 ,  0.        ,  0.   

## UMAPで説明変数を2次元に圧縮（n_neighbors=15）

In [10]:
umap_ = umap.UMAP(
    n_components=2, random_state=CFG.random_seed, n_neighbors=5
)  # n_neighborsのデフォルト値は15
X_train_umap = umap_.fit_transform(X_train)
X_train_umap

# X_train_umapをDataFrameに変換
train_umap_df = pd.DataFrame(X_train_umap, columns=["UMAP1", "UMAP2"])

train_umap_df = pd.concat([train_umap_df, train_df["サンプルID"]], axis=1)
train_umap_df

Unnamed: 0,UMAP1,UMAP2,サンプルID
0,13.943232,-2.223612,DP_001
1,13.955266,-2.646678,DP_002
2,13.812184,-3.06749,DP_003
3,13.927896,-3.4114,DP_004
4,14.390882,-3.601482,DP_005
5,13.77035,-2.196065,DP_006
6,14.36668,-3.325696,DP_007
7,14.151196,-3.843003,DP_008
8,14.480962,-2.176145,DP_009
9,14.852762,-1.999945,DP_010


## UMAPの結果でインタラクティブな可視化を行う

In [11]:
fig = px.scatter(train_umap_df, x="UMAP1", y="UMAP2", hover_data="サンプルID")
fig.show()

In [12]:
fig.write_html(f"{CFG.output_dir}/umap_plotly_chapter3_demo_data_postprocess.html")

In [13]:
# /umap_plotly_chapter3_demo_data_postprocess.htmlをダウンロード
from google.colab import files

files.download('umap_plotly_chapter3_demo_data_postprocess.html')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## 実行環境のライブラリverを保存

In [14]:
# Pythonのverを確認
!python3 -V

Python 3.11.12


In [15]:
!pip freeze > requirements_plotly_sample.txt

In [16]:
files.download('requirements_plotly_sample.txt')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>