## 研究の流れ

この ipynb と README.md を見るだけで流れがわかるようにしました。  
なお、各処理の詳細が知りたい場合は各ファイル、Docstring, [README.md](./README.md), [Notion](https://vizlabstudent.notion.site/de778517ea47444c9598d1f5147d78da?v=9dd0c88c9540426db2fa5a4308baf536&pvs=4)を閲覧してください。

※1 /Magnetic を作業ディレクトリとしてください


In [None]:
## 使用モジュールの一覧
import json
import os
import pickle
import random
import re
import shutil
import subprocess
import sys
from datetime import datetime, timedelta
from glob import glob
from logging import getLogger
from math import floor
from struct import pack

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from flask import Flask, Response, jsonify, request
from flask_cors import CORS
from PIL import Image
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score, precision_score, recall_score
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.multiclass import OneVsRestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC
from torch import cuda
from xgboost import XGBClassifier

# ディレクトリの定義
from src.config.params import DATASETS, IMAGE_PATH, LABELS, ML_DATA_DIR, ROOT_DIR, SIDES, SNAP_PATH, SRC_PATH, VARIABLE_PARAMETERS, set_dataset

In [None]:
# 使用する自作モジュール
from src.MachineLearning.CNN import CnnTrain
from src.MachineLearning.Training import SupervisedML
from src.processing.separator.separatorLoop import move_file, rename_file, set_ij
from src.processing.snap2npy import snap2npy
from src.processing.train.fusion_npy import CreateTrain
from src.processing.train.make_train import _create_json, _set_default, _set_n, _set_xo
from src.processing.viewer.createViewer import _sort_paths
from src.visualization.LIC.LIC import LicMethod
from src.visualization.visualize.Plot import PlotMethod
from src.visualization.visualize.SnapData import SnapData

### 0. フォルダ作成


In [None]:
import subprocess
from src.config.params import BIN_PATH

subprocess.run([BIN_PATH + "/BasePath.bat"], check=True)

### 1. データの加工


##### 1.1 元データを各種パラメータに分割する


In [None]:
import os
import shutil
import subprocess
from glob import glob
from src.config.params import ROOT_DIR, SNAP_PATH, SRC_PATH, VARIABLE_PARAMETERS
from src.processing.separator.separatorLoop import move_file, rename_file, set_ij

In [None]:
# パラメータの定義
items1 = VARIABLE_PARAMETERS.copy()
items2 = ["magfield1", "magfield2", "magfield3", "velocity1", "velocity2", "velocity3"]
xyz = {1: "x", 2: "y", 3: "z"}

for dataset in [77, 497, 4949]:
    ij = set_ij(dataset)
    i, j = ij  # type: ignore

    # ログの保存先
    files = glob(ROOT_DIR + f"/data/ICh.target=50.ares=1.0d-{i}.adiffArt=1.0d-{j}.h00.g00.BCv1=0.0/Snapshots/*")
    for file in files:
        # 元データの分割処理の実行
        subprocess.run([SRC_PATH + "/Processing/separator/separator.exe", f"{file}"], check=False)
        _, _, _, param, job = map(lambda x: int(x) if x.isnumeric() else x, os.path.basename(file).split("."))

        # 出力されたファイル名の変更
        for item2 in items2:
            if os.path.exists(item2):
                rename_file(xyz, item2)

        # 出力されたファイルの移動
        for item1 in items1:
            if os.path.exists(item1):
                move_file(dataset, param, job, item1)

    # coordn を最後に移動させる
    for i in range(1, 4):
        shutil.move("coord" + xyz[i], SNAP_PATH + f"/snap{dataset}")

##### 1.2 バイナリを .npy に変換


In [None]:
from glob import glob

from src.config.params import SNAP_PATH, VARIABLE_PARAMETERS
from src.processing.snap2npy import snap2npy
from src.visualization.visualize.SnapData import SnapData

In [None]:
dataset = 77  # 77, 497, 4949

sp = SnapData()
for param in VARIABLE_PARAMETERS:
    for path in glob(SNAP_PATH + f"/snap{dataset}/{param}/*/*"):
        # 元データを2つに分割
        snap2npy(sp, path, dataset)

## 2. 可視化


#### 2.1 各種可視化


In [None]:
from glob import glob

from src.config.params import SNAP_PATH, DATASETS
from src.visualization.visualize.Plot import PlotMethod

In [None]:
for dataset in DATASETS:
    target_path = SNAP_PATH + f"/snap{dataset}"

    # インスタンスの生成
    viz = PlotMethod(dataset)

    files = {}  # glob した path の保存

    # エネルギーの速さと密度の可視化
    files["density"] = glob(target_path + "/density/*/*")
    files["velocityx"] = glob(target_path + "/velocityx/*/*")
    files["velocityy"] = glob(target_path + "/velocityy/*/*")
    for dens_path, vx_path, vy_path in zip(files["density"], files["velocityx"], files["velocityy"], strict=True):
        viz.drawEnergy_for_velocity(dens_path, vx_path, vy_path)

    # エネルギーの磁場の可視化
    files["magfieldx"] = glob(target_path + "/magfieldx/*/*")
    files["magfieldy"] = glob(target_path + "/magfieldy/*/*")
    for magx_path, magy_path in zip(files["magfieldx"], files["magfieldy"], strict=True):
        viz.drawEnergy_for_magfield(magx_path, magy_path)

    # Heatmap と edge の可視化
    files["enstrophy"] = glob(target_path + "/enstrophy/*/*")
    for val_param in ["velocityx", "velocityy", "magfieldx", "magfieldy", "density", "enstrophy"]:
        for path in files[val_param]:
            viz.drawHeatmap(path)
            viz.drawEdge(path)

#### 2.2 AVS


#### 2.3 StreamLines


#### 2.4 LIC


In [None]:
from glob import glob

from src.config.params import IMAGE_PATH, SNAP_PATH
from src.visualization.LIC.LIC import LicMethod

In [None]:
dataset = 77  # 77, 497, 4949
side = "left"  # right

print("START", f"{dataset}.{side} 開始")
lic = LicMethod()

# 入出力用path の作成
base_out_path = IMAGE_PATH + f"/LIC/snap{dataset}/{side}"  # ./images/LIC/snap77/left
lic.makedir(f"/LIC/snap{dataset}/{side}")

# バイナリファイルの取得
binary_paths = glob(SNAP_PATH + f"/half_{side}/snap{dataset}/magfieldx/*/*.npy")
file_count = len(binary_paths)

for xfile in binary_paths:
    print("START", f"{os.path.splitext(os.path.basename(xfile))[0]} 開始")
    file_name = os.path.splitext(os.path.basename(xfile.replace("magfieldx", "magfield")))
    out_path = base_out_path + f"/lic_snap{dataset}.{os.path.basename(base_out_path)}.{file_name[0]}.bmp"
    # print(out_path) # ./IMAGE_PATH/LIC/snap77/left/lic_snap77.left.magfield.01.14.bmp

    if not os.path.exists(out_path):
        yfile = xfile.replace("magfieldx", "magfieldy")
        props = lic.set_command(xfile, yfile, out_path)
        # 引数の作成
        # 実行 (1画像20分程度)
        lic.LIC(props)

        # temp ファイルの削除
        lic.delete_tempfile(props[1], props[2])

    print("END", f"{os.path.splitext(os.path.basename(xfile))[0]} 終了")

print("END", f"{dataset} 終了")

### 3. 教師データの作成


#### 3-1 ビューワの作成


In [None]:
from src.config.params import IMAGE_PATH, SRC_PATH, SIDES
from src.processing.viewer.createViewer import _sort_paths

In [None]:
for size in SIDES:
    # paths = _sort_paths(paths) # snapの命名規則をもとに時系列順に並び変える。
    paths = glob(IMAGE_PATH + f"/LIC/snap{dataset}/{size}/*.bmp")
    paths_sorted = _sort_paths(paths)

    # viewer用のファイル列を作成する
    path_list_str = "\n"
    for path in paths_sorted:
        path_str = path.replace("\\", "/")
        path_list_str += f"\t\t\t'{path_str}', \n"

    # html の読み込み
    with open(SRC_PATH + "/Processing/viewer/template/viewer_template.html", "r", encoding="utf-8") as f:
        html = f.read()

    # 可視化した.bmpのpathの一覧をhtml に追記
    html = html.replace("{ replaceblock }", path_list_str)

    # html の保存
    out_name = SRC_PATH + f"/Processing/viewer/template/lic_viewer{dataset}.{size}.html"
    with open(out_name, "w", encoding="utf8") as f:
        f.write(html)

#### 3-2 画像の分割

`python ./src/Processing/viewer/writer.py`  
を実行し、  
`./src/Processing/viewer/template/lic_viewer77.html`
を Web で開く (Drug & Drop)


#### 3.3 データの切り取り


In [None]:
import json

from src.config.params import ML_DATA_DIR
from src.processing.train.make_train import _create_json, _set_default, _set_n, _set_xo

In [None]:
dataset = 77  # 77, 497, 4949
side = "left"  # left, right
label = "n"  # n, x, o

# ファイルの生成
file_name = "snap_labels"
if not os.path.exists(ML_DATA_DIR + f"/LIC_labels/{file_name}.json"):
    _create_json(file_name)

# ラベルによって処理が異なる
if label == "n":  # 反応なし
    result_dict = _set_n()
elif label in ["x", "o"]:  # x点、o点
    result_dict = _set_xo(dataset, side, label)
else:  # その他
    raise ValueError

# 保存
folder = ML_DATA_DIR + f"/LIC_labels/{file_name}.json"
with open(folder, "r", encoding="utf-8") as f:
    data = json.load(f)

if data == {}:
    data = _set_default()

with open(folder, "w", encoding="utf-8") as f:
    data[str(dataset)][side][label] = result_dict
    json.dump(data, f)

In [None]:
from src.config.params import SIDES, VARIABLE_PARAMETERS, ML_DATA_DIR, LABELS
from src.processing.train.fusion_npy import CreateTrain

In [None]:
dataset = 77  # 77, 497, 4949
path = ML_DATA_DIR + "/LIC_labels/snap_labels.json"

md = CreateTrain(dataset)
for side in SIDES:
    for label in LABELS:
        for val in VARIABLE_PARAMETERS:
            md.cut_and_save_from_json(path, side, label, val)
        md.cut_and_save_from_image(path, side, label)

#### 3.4. データの合成


In [None]:
import os
from glob import glob
from src.config.params import ML_DATA_DIR, LABELS
from src.processing.train.fusion_npy import CreateTrain

In [None]:
dataset = 77  # 77, 497, 4949
OUT_DIR = ML_DATA_DIR + "/snap_files"  # ./ML/data/snap_files

md = CreateTrain(dataset)
props_params = [
    (["velocityx", "velocityy", "density"], "energy", md.kernel_energy),
]
for val_params, out_basename, kernel in props_params:
    for label in LABELS:  # n, x, o
        npys_path = OUT_DIR + f"/{val_params[0]}/point_{label}"  # ./ML/data/snap_files/{out_basename}/point_{label}

        for img_path in glob(npys_path + f"/snap{dataset}_{val_params[0]}_*.npy"):  # ./ML/data/snap_files/density/point_n
            # 保存先のパスの作成
            # ./ML/data/snap_files/density/point_n/snap77_density_left.01.10_030.150.npy
            out_path = npys_path + f"/{os.path.basename(img_path)}"
            out_path = out_path.replace(val_params[0], out_basename)
            md.create_training(kernel, val_params, img_path, out_path)

### 4. 機械学習


#### 4.1 機械学習

- KMeans
- kneighbors
- linearSVC
- rbfSVC
- XGBoost


In [None]:
import json

from src.config.params import SRC_PATH, ML_MODEL_DIR
from src.MachineLearning.Training import SupervisedML

In [None]:
# 基本情報
training_parameter = "density"  # density, energy, enstrophy, pressure, magfieldx, magfieldy, velocityx, velocityy
split_mode = "mix"  # sep, mixsep, mix
split_mode_label = 0  # 0, 1, 2
mode_name = split_mode + str(split_mode_label) if split_mode == "sep" else split_mode

# 教師データ用パラメータ
clf_name = "LinearSVC"  # kNeighbors, LinearSVC, rbfSVC, XGBoost
test_size = 0.3
model_random_state = 42

# 学習用パラメータ設定
with open(SRC_PATH + "/config/fixed_parameter.json", "r", encoding="utf-8") as f:
    ML_FIXED_PARAM_DICT = json.load(f)

with open(SRC_PATH + "/config/tuning_parameter.json", "r", encoding="utf-8") as f:
    ML_TUNING_PARAM_DICT = json.load(f)

tuning_params = ML_TUNING_PARAM_DICT[clf_name][mode_name][training_parameter]
fixed_params = ML_FIXED_PARAM_DICT[clf_name]

print("PARAMETER : ", f"model={clf_name}, mode={split_mode}, training_parameter={training_parameter}, test_size={test_size}, random_state={model_random_state}")

##### 初回学習


In [None]:
model = SupervisedML(training_parameter=training_parameter)
model.set_default(training_parameter)

print("Learning  : ", "教師データ作成")
model.split_train_test(split_mode)

print("Learning  : ", "学習開始")
model.do_learning(clf_name=clf_name, fixed_params=fixed_params, tuning_params=tuning_params)

print("SAVE      : ", "学習結果の保存")
model.save_npys()  # 教師データの保存  # ML\mdoels\npz\ 配下に保存
model.save_model()  # モデルの保存  # ML\mdoels\model\{split_mode}\{clf_name}\ 配下に保存

print("PREDICT   : ", "予測")
model.predict()  # テストデータで実行
model.print_scores()  # スコアの可視化  # ML\result\{clf_name}\ 配下の .txtファイルに保存

print("END       : ", "処理終了")

##### 教師データ作成済


In [None]:
print("LOAD      : ", "データの読み込み")
model = SupervisedML.load_npys(split_mode=split_mode, training_parameter=training_parameter, test_size=test_size, random_state=model_random_state)

print("Learning  : ", f"学習開始 ({clf_name})")
model.do_learning(clf_name=clf_name, fixed_params=fixed_params, tuning_params=tuning_params)

print("SAVE      : ", "学習結果の保存")
model.save_model()  # モデルの保存  # ML\mdoels\model\{split_mode}\{clf_name}\ 配下に保存

print("PREDICT   : ", "予測")
model.predict()  # テストデータで実行
model.print_scores()  # スコアの可視化  # ML\result\{clf_name}\ 配下の .txtファイルに保存

print("END       : ", "処理終了")

##### モデル作成済


In [None]:
print("LOAD", f"モデルの読み込み ({clf_name})")
path = ML_MODEL_DIR + f"/model/{split_mode}/model_{clf_name}_{training_parameter}_{mode_name}.C={tuning_params['C']}.sav"
model = SupervisedML.load_model(training_parameter=training_parameter, split_mode=split_mode, split_mode_label=split_mode_label, load_path=path)

print("PREDICT", "予測")
model.predict()  # テストデータで実行
model.print_scores()  # スコアの可視化  # ML\result\{clf_name}\ 配下の .txtファイルに保存

print("END       : ", "処理終了")

#### 4.3 CNN


In [None]:
from src.config.params import ML_MODEL_DIR
from src.MachineLearning.CNN import CnnTrain

In [None]:
training_parameter = "density"  # density, energy, enstrophy, pressure, magfieldx, magfieldy, velocityx, velocityy
split_mode = "mix"  # sep, mixsep, mix
split_mode_label = 0  # 0, 1, 2
mode_name = split_mode + str(split_mode_label) if split_mode == "sep" else split_mode
print("PARAMETER : ", f"model={clf_name}, training_parameter={training_parameter}, mode={split_mode}")


##### 学習


In [None]:
print("LOAD      : ", "データの読み込み")
model = CnnTrain(training_parameter=training_parameter, split_mode=split_mode, split_mode_label=split_mode_label)
model.set_net()
model.set_train(seed=42)

print("Learning  : ", f"学習開始 ({clf_name})")
EPOCH = 100
model.run(epoch_cnt=EPOCH, do_plot=True)

print("PREDICT   : ", "予測")
model.predict() #
model.print_scores()

print("SAVE      : ", "学習結果の保存")
model.save_model()  # モデルの保存 # ML\mdoels\model\{split_mode}\{clf_name} 配下に保存

print("END       : ", "処理終了")


##### モデル作成済


In [None]:
print("LOAD      : ", "モデルの読み込み")
model_path = ML_MODEL_DIR + f"/model/{split_mode}/model_cnn_npy_{training_parameter}_{mode_name}.save=model.device=cuda.pth"
model = CnnTrain.load_model(training_parameter=training_parameter, split_mode=split_mode, split_mode_label=split_mode_label, load_path=model_path)
model.set_train(seed=42)

print("PREDICT   : ", "予測")
model.predict()  # テストデータで実行
model.print_scores()  # スコアの可視化  # ML\result\{clf_name} 配下の .txtファイルに保存

print("END       : ", "処理終了")
