CAEデモ／準備
----
概要
----
* データ作成時の設定ファイルは、yaml形式です。
    * 28x28のメッシュデータに対して、境界条件の分割数を指定できます。(= NUM_BOUNDARY_BLOCKS)
    * 生成するデータ数を指定できます。(= NUM_TRAIN_DATA)

In [1]:
# 可変パラメータ定義
NUM_BOUNDARY_BLOCKS = 4
NUM_TRAIN_DATA = 1600

In [2]:
# ユニークなファイル名生成
from datetime import datetime
_d = datetime.now().strftime('%s')
_f = "{d}_{b:02d}_{t:06d}_input.yml".format(d=_d, b=NUM_BOUNDARY_BLOCKS,t=NUM_TRAIN_DATA)

In [3]:
# 設定内容
settings = {
    'NUMBER_OF_TRAINING_DATA' : NUM_TRAIN_DATA,
    'NUMBER_OF_BOUNDARY_BLOCKS_PER_X_WALL' : NUM_BOUNDARY_BLOCKS,
    'NUMBER_OF_BOUNDARY_BLOCKS_PER_Y_WALL' : NUM_BOUNDARY_BLOCKS,
    'CONVERGENCE_CRITERIA': 0.01,
    'X_SOLVE': 32,
    'Y_SOLVE': 32,
    'X_CUT': 28,
    'Y_CUT': 28,
    'TEMPARATURE_MIN': 0,
    'TEMPARATURE_MAX': 255,
    'X_BOUND': 1,
    'Y_BOUND': 1,
}

In [4]:
import yaml

# 設定ファイル作成
with open(_f, 'w') as _y:
    _y.write(yaml.dump(settings, default_flow_style=False))
    print('yaml: {}'.format(_f))

yaml: 1506153849_04_001600_input.yml


データ作成
----
* 設定ファイルに基いて作成する。
* 境界条件（＝ラベル）は、分割数に基づき乱数で作成する。
* 温度分布（＝データ）は、拡散方程式を解いて作成する。

### 諸準備

In [5]:
import os

print('initialize')

# 入出力ファイル準備
inputfile = _f
f = open(inputfile, "r+")
input_data = yaml.load(f)

_tmp = _f.split('_')
train_file = "{}_{}_{}_training_data.csv".format(_tmp[0],_tmp[1],_tmp[2])
val_file = "{}_{}_{}_validation_data.csv".format(_tmp[0],_tmp[1],_tmp[2])

def check_and_remove_file(target):
    if os.path.exists(target): os.remove(target)

check_and_remove_file(train_file)
check_and_remove_file(val_file)

initialize


In [6]:
# 設定ファイル読込
NUMBER_OF_TRAINING_DATA = input_data['NUMBER_OF_TRAINING_DATA']
NUMBER_OF_BOUNDARY_BLOCKS_PER_X_WALL = input_data['NUMBER_OF_BOUNDARY_BLOCKS_PER_X_WALL']
NUMBER_OF_BOUNDARY_BLOCKS_PER_Y_WALL = input_data['NUMBER_OF_BOUNDARY_BLOCKS_PER_Y_WALL']
CONVERGENCE_CRITERIA = input_data['CONVERGENCE_CRITERIA']
X_SOLVE = input_data['X_SOLVE']
Y_SOLVE = input_data['Y_SOLVE']
X_CUT = input_data['X_CUT']
Y_CUT = input_data['Y_CUT']
TEMPARATURE_MIN = input_data['TEMPARATURE_MIN']
TEMPARATURE_MAX = input_data['TEMPARATURE_MAX']
X_BOUND = input_data['X_BOUND']
Y_BOUND = input_data['Y_BOUND']

In [7]:
###########################################
# allocating and initializing 2d array
###########################################

# all regions including boundary condition
x_size = X_SOLVE + 2 * X_BOUND
y_size = Y_SOLVE + 2 * Y_BOUND
print('area size: {} x {}'.format(x_size, y_size))

###########################################
# seting boundary conditions
###########################################

# calc-region excluding boudary condition
imin = X_BOUND
imax = x_size - X_BOUND
jmin = Y_BOUND
jmax = y_size - Y_BOUND

# boundary condition parameter
xarray_size = X_SOLVE
yarray_size = Y_SOLVE
tmin = TEMPARATURE_MIN
tmax = TEMPARATURE_MAX
num_xblock = NUMBER_OF_BOUNDARY_BLOCKS_PER_X_WALL
num_yblock = NUMBER_OF_BOUNDARY_BLOCKS_PER_Y_WALL

area size: 34 x 34


### 関数等定義

In [8]:
import numpy as np
import random
from enum import Enum

# enum定義
class BoundType(Enum):
    TOP = 1
    RIGHT = 2
    BOTTOM = 3
    LEFT = 4
    
# クラス定義
class RandomArray(object):
    def __init__(self, array_size, imin, imax, num_block):
        
        if array_size % num_block != 0:
            sys.stderr.write('\n"array_size % number_of_blocks = 0" is required\n')
            sys.stderr.write('\nPlease change array_size or boundary block number\n')
            sys.exit()

        self.__setValues(imin, imax, num_block)
        self.__setArray(self.values, array_size)
        
#        val_in_block = np.zeros(num_block)
#        for i in range(num_block):
#            val_in_block[i] = random.randint(imin, imax)  

#        self.array = np.zeros(array_size)
#        block_size = array_size / num_block
#        for i in range(array_size):
#            j = int(i / block_size)
#            self.array[i] = val_in_block[j]
#        self.values = val_in_block
        
        
    def __setValues(self, imin, imax, num_block):
        self.values = np.random.randint(imin, imax, num_block)
        return
    
    
    def __setArray(self, values, array_size):
        block_size = array_size / len(values)
        self.array = [ values[int(_/block_size)] for _ in range(array_size) ]
        return
    
    
    def data(self):
        return self.array
    
    
    def values(self):
        return self.values
    
    
class BoundaryConditions(object):
    def __init__(self, area):
        self.area = area
        self.xlen = len(area[0])
        self.ylen = len(area)
        
    def __xdirichlet(self, xarray, xmin, yconst):
        for i, val in enumerate(xarray[:]):
            self.area[yconst][xmin+i] = val
            
    def __ydirichlet(self, yarray, xconst, jmin):
        for i,val in enumerate(yarray[:]):
            self.area[jmin+i][xconst] = val  

    def setBoundary(self, bound, x, y, bound_type):
        if(bound_type == BoundType.TOP):    self.__xdirichlet(bound, x, y)
        if(bound_type == BoundType.RIGHT):  self.__ydirichlet(bound, x, y)
        if(bound_type == BoundType.BOTTOM): self.__xdirichlet(bound, x, y)
        if(bound_type == BoundType.LEFT):   self.__ydirichlet(bound, x, y)


# ユーティリティ関数定義
def initialize_fields(x_size, y_size):
    return np.zeros((x_size, y_size))

def getDataAndLable(area):
    # output 28 x 28 area like mnist
    x_offset = int((X_SOLVE - X_CUT) / 2) + X_BOUND
    y_offset = int((Y_SOLVE - Y_CUT) / 2) + Y_BOUND
    
    def __transfer_2d_array_to_1d_array(area, imin, jmin, imax, jmax):
        x_cut_size = imax - imin
        y_cut_size = jmax - jmin
        training_data = np.zeros(x_cut_size * y_cut_size)
        for j, jj in enumerate(range(jmin, jmax)):
            for i, ii in enumerate(range(imin, imax)):
                training_data[i*y_cut_size + j] = '%03.1f' % area[jj][ii]
        return training_data

    # transfer 2d-array to 1d-array due to deep learning training data
    data_1d = __transfer_2d_array_to_1d_array(area, x_offset, y_offset, x_size - x_offset, y_size - y_offset)
    
    # preparing label for deep learning. this labes is 1d-vector
    tmp_x = np.r_[t_wall.values, b_wall.values]
    tmp_y = np.r_[l_wall.values, r_wall.values]
    label_vector = np.r_[tmp_x, tmp_y]
    
    return data_1d, label_vector

### 拡散方程式を解く

In [9]:
def diffusion_equation(area, imin, jmin, imax, jmax, convergence_criteria):    
    sum_now = 0.0
    sum_old = 0.0
    residual_error = 1
    
    while (residual_error > convergence_criteria):
        for j in range(jmin, jmax):
            for i in range(imin, imax):
                area[j][i] = 0.25 * (area[j  ][i-1] + area[j  ][i+1] 
                                   + area[j+1][i  ] + area[j-1][i  ])
                sum_now = sum_now + area[j][i]
        residual_error = abs((sum_now - sum_old) / sum_now)
        sum_old = sum_now

    return

### 学習用データの生成

In [10]:
from tqdm import tqdm

# 指定された学習用データ＋評価用に25%マシ
NUM_LOOP = int(NUMBER_OF_TRAINING_DATA*1.25)

# データとラベルのリスト
data = []
labels = []

# データ生成
for iter in tqdm(range(NUM_LOOP)):

    area = initialize_fields(x_size, y_size)
    
    # 境界条件を乱数で決定
    t_wall = RandomArray(xarray_size, tmin, tmax, num_xblock)
    b_wall = RandomArray(xarray_size, tmin, tmax, num_xblock)    
    l_wall = RandomArray(yarray_size, tmin, tmax, num_yblock)     
    r_wall = RandomArray(yarray_size, tmin, tmax, num_yblock)
    
    # 境界条件を設定
    bc = BoundaryConditions(area)
    bc.setBoundary(t_wall.array,       imin,          0, BoundType.TOP)
    bc.setBoundary(b_wall.array,       imin, (y_size-1), BoundType.BOTTOM)
    bc.setBoundary(r_wall.array, (x_size-1),       jmin, BoundType.RIGHT)
    bc.setBoundary(l_wall.array,          0,       jmin, BoundType.LEFT)
    
    # running solver
    diffusion_equation(area, imin, jmin, imax, jmax, CONVERGENCE_CRITERIA)

    # 計算結果からデータとラベルの抽出
    _data, _label = getDataAndLable(area)
    data.append(_data)
    labels.append(_label)

100%|██████████| 2000/2000 [30:48<00:00,  1.12it/s]


In [11]:
import pandas as pd

data_and_labels = np.c_[data, labels]
df_train = pd.DataFrame(data_and_labels[:NUMBER_OF_TRAINING_DATA])
df_val = pd.DataFrame(data_and_labels[NUMBER_OF_TRAINING_DATA:])

# ファイル出力
df_train.to_csv(train_file, float_format='%.1f',index=False, header=False)
df_val.to_csv(val_file, float_format='%.1f',index=False, header=False)

print('output: {}, << {} cols, {} rows>>'.format(train_file, df_train.shape[1], df_train.shape[0]))
print('output: {}, << {} cols, {} rows>>'.format(val_file, df_val.shape[1], df_val.shape[0]))

output: 1506153849_04_001600_training_data.csv, << 800 cols, 1600 rows>>
output: 1506153849_04_001600_validation_data.csv, << 800 cols, 400 rows>>
