## 研究の流れ

この ipynb と README.md を見るだけで流れがわかるようにしました。  
なお、各処理の詳細が知りたい場合は各ファイル、Docstring, [README.md](./README.md), [Notion](https://vizlabstudent.notion.site/de778517ea47444c9598d1f5147d78da?v=9dd0c88c9540426db2fa5a4308baf536&pvs=4)を閲覧してください。


In [1]:
## 使用モジュールの一覧
import json
import os
import pickle
import random
import re
import shutil
import subprocess
import sys
from datetime import datetime, timedelta
from glob import glob
from logging import getLogger
from math import floor
from struct import pack

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from flask import Flask, Response, jsonify, request
from flask_cors import CORS
from PIL import Image
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score, precision_score, recall_score
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.multiclass import OneVsRestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC
from torch import cuda
from xgboost import XGBClassifier

# ディレクトリの定義
from src.config.params import DATASETS, IMAGE_PATH, IMG_SHAPE, LABELS, ML_DATA_DIR, ML_MODEL_DIR, ML_RESULT_DIR, ROOT_DIR, SIDES, SNAP_PATH, SRC_PATH

### 0. フォルダ作成


In [None]:
import subprocess

subprocess.run([ROOT_DIR + "/etc/BasePath.bat"])

### 1. データの加工


##### 1.1 元データを各種パラメータに分割する


In [2]:
import os
import sys
import shutil
import subprocess
from glob import glob

sys.path.append(os.getcwd() + "/src")

from src.config.params import ROOT_DIR, SNAP_PATH, SRC_PATH, DATASETS, VARIABLE_PARAMETERS
from src.Processing.separater.separatorLoop import move_file, rename_file, set_ij

In [None]:
# パラメータの定義
items1 = VARIABLE_PARAMETERS.copy()
items2 = ["magfield1", "magfield2", "magfield3", "velocity1", "velocity2", "velocity3"]
xyz = {1: "x", 2: "y", 3: "z"}

for dataset in DATASETS:
    ij = set_ij(dataset)
    if ij:
        i, j = ij  # type: ignore
    else:
        print("Value Error", "入力したデータセットは使用できません")
        sys.exit()

    # ログの保存先
    files = glob(ROOT_DIR + f"/data/ICh.dataset=50.ares=1.0d-{i}.adiffArt=1.0d-{j}.h00.g00.BCv1=0.0/Snapshots/*")
    for file in files:
        # 元データの分割処理の実行
        subprocess.run([SRC_PATH + "/Processing/cln/separator.exe", f"{file}"])
        _, _, _, param, job = map(lambda x: int(x) if x.isnumeric() else x, os.path.basename(file).split("."))
        print("OPEN", f"File snap{i}{j}.{param:02d}.{job:02d}")

        # 出力されたファイル名の変更
        for item2 in items2:
            if os.path.exists(item2):
                rename_file(xyz, item2)

            else:  # 見つからない場合
                print("NotFound", f"ファイル {item2}.{param:02d}.{job:02d}")

        # 出力されたファイルの移動
        for item1 in items1:
            if os.path.exists(item1):
                move_file(dataset, param, job, item1)

            else:  # 見つからない場合
                print("NotFound", f"ファイル {item1}.{param:02d}.{job:02d}")

        print("CLOSE", f"File snap{i}{j}.{param:02d}.{job:02d}")

    # coordn を最後に移動させる
    for i in range(1, 4):
        shutil.move("coord" + xyz[i], SNAP_PATH + f"/snap{dataset}")

    print("END", "処理終了")

##### 1.2 バイナリを .npy に変換


In [None]:
import os
import sys
from glob import glob

sys.path.append(os.getcwd() + "/src")

from src.config.params import SNAP_PATH, VARIABLE_PARAMETERS, set_dataset
from src.Processing.snap2npy import snap2npy
from src.Visualization.Visualize.SnapData import SnapData

In [None]:
dataset = set_dataset(input())

sp = SnapData()
for param in VARIABLE_PARAMETERS:
    for path in glob(SNAP_PATH + f"/snap{dataset}/{param}/*/*"):
        # 元データを2つに分割
        snap2npy(sp, path, dataset)

## 2. 可視化


#### 2.1 各種可視化


In [None]:
import os
import sys
from glob import glob

sys.path.append(os.getcwd() + "/src")

from src.config.params import SNAP_PATH
from src.Visualization.Visualize.Plot import PlotMethod

In [None]:
dataset = set_dataset(input())

target_path = SNAP_PATH + f"/snap{dataset}"
viz = PlotMethod(dataset)

files = {}

# 速度場の可視化
files["density"] = glob(target_path + f"/density/*/*")
files["velocityx"] = glob(target_path + f"/velocityx/*/*")
files["velocityy"] = glob(target_path + f"/velocityy/*/*")
for dens_path, vx_path, vy_path in zip(files["density"], files["velocityx"], files["velocityy"]):
    viz.drawEnergy_for_velocity(dens_path, vx_path, vy_path)

# 磁場の可視化
files["magfieldx"] = glob(target_path + f"/magfieldx/*/*")
files["magfieldy"] = glob(target_path + f"/magfieldy/*/*")
for magx_path, magy_path in zip(files["magfieldx"], files["magfieldy"]):
    viz.drawEnergy_for_magfield(magx_path, magy_path)

# HeatMap
files["enstrophy"] = glob(target_path + f"/enstrophy/*/*")
for target in ["velocityx", "velocityy", "magfieldx", "magfieldy", "density", "enstrophy"]:
    for path in files[target]:
        viz.drawHeatmap(path)
        viz.drawEdge(path)

#### 2.2 AVS


#### 2.3 StreamLines


#### 2.4 LIC


In [None]:
import os
import sys
from glob import glob

sys.path.append(os.getcwd() + "/src")

from src.config.params import SNAP_PATH, IMAGE_PATH, SRC_PATH, DATASETS, set_dataset
from src.Visualization.LIC.LIC import LicMethod

In [None]:
dataset = set_dataset(input())
side = "left"  # right

if dataset not in DATASETS:
    print("ERROR", "このデータセットは使用できません")
    sys.exit()

print("START", f"{dataset}.{side.split('_')[1]} 開始")

if not os.path.exists(SRC_PATH + "/Visualization/LIC/LIC.exe"):
    raise FileNotFoundError

lic = LicMethod()

# 入出力用path の作成
base_out_path = IMAGE_PATH + f"/LIC/snap{dataset}/{side.split('_')[1]}"  # ./images/LIC/snap77/left
lic.makedir(f"/LIC/snap{dataset}/{side.split('_')[1]}")

# バイナリファイルの取得
binary_paths = glob(SNAP_PATH + f"/{side}/snap{dataset}/magfieldx/*/*.npy")
file_count = len(binary_paths)

# ファイルが無い場合
if file_count == 0:
    print("ERROR", "File not Found")
    sys.exit()

for xfile in binary_paths:
    print("START", f"{os.path.splitext(os.path.basename(xfile))[0]} 開始")
    file_name = os.path.splitext(os.path.basename(xfile.replace("magfieldx", "magfield")))
    out_path = base_out_path + f"/lic_snap{dataset}.{os.path.basename(base_out_path)}.{file_name[0]}.bmp"
    # print(out_path) # ./IMAGE_PATH/LIC/snap77/left/lic_snap77.left.magfield.01.14.bmp

    if not os.path.exists(out_path):
        yfile = xfile.replace("magfieldx", "magfieldy")
        props = lic.set_command(xfile, yfile, out_path)
        # 引数の作成
        # 実行 (1画像20分程度)
        lic.LIC(props)

        # temp ファイルの削除
        lic.delete_tempfile(props[1], props[2])

    print("END", f"{os.path.splitext(os.path.basename(xfile))[0]} 終了")

print("END", f"{dataset} 終了")

### 3. 教師データの作成


#### 3-1 ビューワの作成


In [1]:
import os
import sys

sys.path.append(os.getcwd() + "/src")

from src.Processing.viewer.createViewer import _sort_paths
from src.config.params import IMAGE_PATH, SRC_PATH, SIDES

In [2]:
for size in SIDES:
    # paths = _sort_paths(paths) # snapの命名規則をもとに時系列順に並び変える。
    paths = glob(IMAGE_PATH + f"/LIC/snap{dataset}/{size}/*.bmp")
    paths_sorted = _sort_paths(paths)

    # viewer用のファイル列を作成する
    path_list_str = "\n"
    for path in paths_sorted:
        path_str = path.replace("\\", "/")
        path_list_str += f"\t\t\t'{path_str}', \n"

    # html の読み込み
    with open(SRC_PATH + "/Processing/viewer/template/viewer_template.html", "r", encoding="utf-8") as f:
        html = f.read()

    # 可視化した.bmpのpathの一覧をhtml に追記
    html = html.replace("{ replaceblock }", path_list_str)

    # html の保存
    out_name = SRC_PATH + f"/Processing/viewer/template/lic_viewer{dataset}.{size}.html"
    with open(out_name, "w", encoding="utf8") as f:
        f.write(html)

#### 3-2 画像の分割

`python ./src/Processing/viewer/writer.py`  
を実行し、  
`./src/Processing/viewer/template/lic_viewer77.html`
を Web で開く (Drug & Drop)


#### 3.3 データの切り取り


In [None]:
import os
import sys

sys.path.append(os.getcwd())
sys.path.append(os.getcwd() + "/src")

from src.config.params import set_dataset, SIDES
from src.Processing.train.fusion_npy import CreateTrain

In [None]:
dataset = set_dataset(input())

path = ML_DATA_DIR + "/LIC_labels/snap_labels.json"
if not os.path.exists(path):
    raise ValueError("File not found")

md = CreateTrain(dataset)
for val in ["magfieldx", "magfieldy", "velocityx", "velocityy", "density"]:
    for side in SIDES:
        for label in range(3):
            md.cut_and_save_from_json(path, side, label, val)

#### 3.4. データの合成


In [None]:
import os
import sys
from glob import glob

sys.path.append(os.getcwd())
sys.path.append(os.getcwd() + "/src")

from src.config.params import ML_DATA_DIR, LABELS, set_dataset
from src.Processing.train.fusion_npy import CreateTrain

In [None]:
dataset = set_dataset(input())

md = CreateTrain(dataset)
props_params = [
    (["magfieldx", "magfieldy"], "mag_tupledxy", md.kernel_listxy),
    (["velocityx", "velocityy", "density"], "energy", md.kernel_Energy),
]

# /images/0131_not/density/density_49.50.8_9.528
OUT_DIR = ML_DATA_DIR + f"/snap{dataset}"
for val_params, out_basename, kernel in props_params:
    print("START", val_params)
    for label in LABELS.values():  # n, o, x
        print("START", f"label : {label}")
        npys_path = OUT_DIR + f"/point_{label}"

        for img_path in glob(npys_path + f"/{val_params[0]}/*.npy"):
            im_list = md.loadBinaryData(img_path, val_params)  # 混合データのロード
            result_img = kernel(*im_list)  # データの作成

            # 保存先のパスの作成
            # /MLdata/snap{dataset}/{out_basename}/{out_basename}_{dataset}.{param}.{job}_{centerx}.{centery}.npy
            # /MLdata/snap77/energy/energy_77.01.03_131.543.npy
            out_path = npys_path + f"/{out_basename}/{os.path.basename(img_path).replace(val_params[0], out_basename)}"
            md.save_Data(result_img, out_path)  # データの保存

        print("END", f"label : {label}")
    print("END", val_params)

### 4. 機械学習


#### 4.1 機械学習
- KMeans
- kneighbors
- linearSVC
- rbfSVC
- XGBoost

In [1]:
from src.MachineLearning.Training import SupervisedML

In [None]:
# 学習用パラメータ設定
# from src.config.params import ML_PARAM_DICT

ML_PARAM_DICT = {
    "KMeans": {"n_clusters": 3, "n_init": 10, "max_iter": 300, "tol": 1e-04, "random_state": 100, "verbose": 10},
    "kneighbors": {"n_clusters": 3, "n_init": 10, "max_iter": 300, "tol": 1e-04, "random_state": 100, "verbose": 10},
    "linearSVC": {"C": 0.3, "random_state": 0, "verbose": 10},
    "rbfSVC": {
        "C": 1.0,  # 正則化パラメータ、マージン
        "cache_size": 200,  # キャッシュサイズ
        "coef0": 0.0,  # Independent term in kernel function. It is only significant in ‘poly’ and ‘sigmoid’.
        "decision_function_shape": "ovr",
        "degree": 3,  # 多項式(poly)カーネルの次数
        "gamma": "scale",  # カーネルの係数、ガウスカーネル(rbf): 1/(n_features * X.var()) と シグモイドカーネル(sigmoid): 1 /n_features
        "kernel": "rbf",  # カーネル('linear', 'poly', 'rbf', 'sigmoid', 'precomputed')
        "max_iter": -1,  # ソルバー内の反復に対するハード制限
        "probability": False,  # True の場合、予測時に各クラスに属する確率を返す
        "random_state": None,  # 乱数の seed値
        "shrinking": True,  # 縮小ヒューリスティックを使用するかどうか
        "tol": 0.001,  # 停止基準の許容値
        "verbose": 2,  # 詳細な出力を有効化
    },
    "XGBoost": {
        "colsample_bytree": 0.4,
        "early_stopping_rounds": 100,
        "eval_metric": "auc",
        "learning_rate": 0.02,
        "max_depth": 4,
        "missing": -1,
        "n_estimators": 1000,
        "subsample": 0.8,
        "params": {},
        "verbose": 50,
    }
}

In [None]:
# 基本情報
mode = "mixsep"
parameter = "density"
clf_name = "XGBoost"  # "KMeans", "kneighbors", "linearSVC", "rbfSVC", "XGBoost"

# 教師データ用パラメータ
pca = False
test_size = 0.3
model_random_state = 100

print("PARAMETER : ", f"name={clf_name}, mode={mode}, parameter={parameter}, pca={pca}, test_size={test_size}, random_state={model_random_state}")

# 機械学習用パラメータ
param_dict = ML_PARAM_DICT[clf_name]

print("LOAD      : ", "データの読み込み")
model = SupervisedML.load_npys(mode=mode, parameter=parameter, pca=pca, test_size=test_size, random_state=model_random_state)

print("Learning  : ", f"学習開始 ({clf_name})")
model.do_learning(clf_name=clf_name, param_dict=param_dict)

print("SAVE      : ", "学習結果の保存")
model.save_model()

print("PREDICT   : ", "予測")
model.predict()

print("END       : ", "処理終了")


In [None]:
print("LOAD", f"モデルの読み込み ({clf_name})")
model = SupervisedML.load_model(parameter, mode=mode, name=clf_name, model_random_state=model_random_state, param_dict=param_dict)

print("PREDICT", "予測")
model.predict()


#### 4.3 CNN

In [None]:
from src.MachineLearning.CNN import CnnTrain

In [None]:
EPOCH = 20

model = CnnTrain()
model.set_net()
model.set_train(seed=100)
model.run(epoch_cnt=EPOCH, do_plot=True)
model.save_model()
