This notebook with transform the output of Paraview into something feedable to ML

In [1]:
import pickle

import pandas as pd

import numpy as np

from tqdm.notebook import tqdm

In [2]:
from pathlib import PurePath

In [3]:
NUM_FRAMES = 999   # number of timestep. Before was 9! 344!

Our simulation managed to create `NUM_FRAMES` in csv forms

In [4]:
data_paths = [PurePath("new_data", f"data_{i}.csv") for i in range(0,NUM_FRAMES)]

In [5]:
dfs_0 = pd.read_csv(str(data_paths[0]))
dfs_0

Unnamed: 0,Time,Points:0,Points:1,Points:2,U:0,U:1,U:2,k,nut,omega,p
0,0.002,0.024731,-0.001165,0.097489,-0.059273,0.062762,10.573,0.006028,0.000032,278.280,3517.600
1,0.002,0.004980,-0.016126,4.835800,-0.025099,0.018351,10.484,0.004322,0.000032,44.067,80.626
2,0.002,0.001506,-0.028538,4.913500,0.000000,0.000000,0.000,0.018508,0.000000,6961.300,42.379
3,0.002,0.026603,-0.010438,4.934800,0.000000,0.000000,0.000,0.018638,0.000000,6983.000,31.822
4,0.002,0.001855,0.024689,0.173850,0.040683,0.017779,10.479,0.006059,0.000025,287.620,3480.800
...,...,...,...,...,...,...,...,...,...,...,...
7577,0.002,-0.013326,0.025280,0.124640,0.000000,0.000000,0.000,0.018415,0.000000,6969.100,3504.500
7578,0.002,0.017788,0.021330,0.096825,-0.065950,0.078987,10.243,0.016613,0.000004,4931.500,3517.800
7579,0.002,0.005357,-0.015784,0.196900,0.017935,0.000499,10.545,0.004153,0.000026,24.108,3469.100
7580,0.002,0.024119,0.015329,0.657600,0.000000,0.000000,0.000,0.006105,0.000000,468.020,3139.400


There are five values to choose from:

| Terms | Meaning                                               |   
|-------|-------------------------------------------------------|
| u     | speed vector                                          |
| k     | rate of dissipation from kinematic turbulence to heat |                                                                     
| omega | omega is the kinematic turbulence energy              |                                                                 
| nut   | kinematic turbulent viscosity                         |
| p     | Pressure                                              |

Matrix is a long one, has the shape of `(m=time, n=space)`
$$\begin{bmatrix} C_{1,1} & C_{1,2} & \cdots & C_{1,n} \\ C_{2,1} & C_{2,2} & \cdots & C_{2,n} \\ \vdots & \vdots & \vdots & \vdots \\ C_{m,1} & C_{m,2} & \cdots & C_{m,n} \end{bmatrix}  (1)$$

In [6]:
dfs_1 = pd.read_csv(str(data_paths[1]))
dfs_1

Unnamed: 0,Time,Points:0,Points:1,Points:2,U:0,U:1,U:2,k,nut,omega,p
0,0.004,0.024731,-0.001165,0.097489,-0.067505,0.026594,11.308,0.005933,4.079100e-05,279.510,485.5100
1,0.004,0.004980,-0.016126,4.835800,0.114840,0.041690,11.286,0.004493,3.888800e-05,73.945,5.6927
2,0.004,0.001506,-0.028538,4.913500,0.000000,0.000000,0.000,0.561540,3.605000e-07,10798.000,3.0672
3,0.004,0.026603,-0.010438,4.934800,0.000000,0.000000,0.000,0.552950,2.581800e-07,10772.000,2.3105
4,0.004,0.001855,0.024689,0.173850,-0.012177,-0.022595,11.120,0.005981,4.250800e-05,294.770,482.9800
...,...,...,...,...,...,...,...,...,...,...,...
7577,0.004,-0.013326,0.025280,0.124640,0.000000,0.000000,0.000,0.522590,3.248700e-08,10586.000,484.6300
7578,0.004,0.017788,0.021330,0.096825,-0.070005,0.075416,10.398,0.309730,3.451100e-05,6743.200,485.4800
7579,0.004,0.005357,-0.015784,0.196900,0.011529,-0.017054,11.499,0.004344,5.775900e-05,39.388,481.9700
7580,0.004,0.024119,0.015329,0.657600,0.000000,0.000000,0.000,0.014172,0.000000e+00,545.820,430.9400


### Start ETL

In [7]:
chosen_columns = ['U:0', 'U:1', 'U:2', 'k', 'nut', 'omega', 'p']
tables = {}
for term in chosen_columns:
    print(f"Working with {term}")
    dfs_0 = pd.read_csv(str(data_paths[0]))
    p_data = dfs_0[term].copy()
    for i in tqdm(range(1,NUM_FRAMES)):
        _ = pd.read_csv(str(data_paths[i]))
        pd.testing.assert_frame_equal(dfs_0[["Points:0", "Points:1", "Points:2"]], _[["Points:0", "Points:1", "Points:2"]]) # Make sure all points coordinates are the same accross time
        p_data = pd.concat([p_data, _[term]], axis=1)
    print(f"shape of p_data {p_data.shape}")
    tables[term] = p_data

Working with U:0


  0%|          | 0/998 [00:00<?, ?it/s]

shape of p_data (7582, 999)
Working with U:1


  0%|          | 0/998 [00:00<?, ?it/s]

shape of p_data (7582, 999)
Working with U:2


  0%|          | 0/998 [00:00<?, ?it/s]

shape of p_data (7582, 999)
Working with k


  0%|          | 0/998 [00:00<?, ?it/s]

shape of p_data (7582, 999)
Working with nut


  0%|          | 0/998 [00:00<?, ?it/s]

shape of p_data (7582, 999)
Working with omega


  0%|          | 0/998 [00:00<?, ?it/s]

shape of p_data (7582, 999)
Working with p


  0%|          | 0/998 [00:00<?, ?it/s]

shape of p_data (7582, 999)


---

In [8]:
tables['p']

Unnamed: 0,p,p.1,p.2,p.3,p.4,p.5,p.6,p.7,p.8,p.9,...,p.10,p.11,p.12,p.13,p.14,p.15,p.16,p.17,p.18,p.19
0,3517.600,485.5100,457.1300,455.8700,476.9800,485.2100,500.2900,531.5700,554.6400,569.9300,...,85.62100,86.23400,86.66000,87.12900,86.75400,86.61700,86.42800,86.63000,86.42600,86.78100
1,80.626,5.6927,5.7872,5.8207,5.7874,5.7645,5.7888,5.7675,5.7865,5.8104,...,0.38813,0.39111,0.39430,0.39584,0.39526,0.39591,0.39827,0.39869,0.39721,0.39435
2,42.379,3.0672,3.1071,3.1237,3.1460,3.1646,3.1779,3.1361,3.1633,3.2942,...,0.29002,0.29027,0.29124,0.29230,0.29319,0.29408,0.29438,0.29383,0.29341,0.29383
3,31.822,2.3105,2.3243,2.3336,2.3522,2.3649,2.3764,2.3661,2.3219,2.3982,...,0.18927,0.18889,0.18915,0.18949,0.18966,0.18997,0.18998,0.18933,0.18839,0.18780
4,3480.800,482.9800,454.5700,453.3100,474.4200,482.6300,497.6700,528.9000,551.9200,567.1600,...,85.48600,86.10000,86.52500,86.99500,86.62000,86.48200,86.29300,86.49600,86.29200,86.64600
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7577,3504.500,484.6300,456.2500,455.0100,476.1200,484.3200,499.3900,530.6500,553.7100,568.9900,...,85.60200,86.21600,86.64100,87.11100,86.73600,86.59800,86.40900,86.61200,86.40800,86.76200
7578,3517.800,485.4800,457.0900,455.8200,476.9200,485.1300,500.2100,531.4900,554.5500,569.8300,...,85.57500,86.18800,86.61400,87.08300,86.70900,86.57100,86.38200,86.58400,86.38100,86.73500
7579,3469.100,481.9700,453.5300,452.2400,473.3200,481.5000,496.5100,527.7100,550.6800,565.8800,...,85.28300,85.89600,86.32200,86.79200,86.41700,86.27900,86.09000,86.29300,86.08900,86.44300
7580,3139.400,430.9400,401.9700,397.1500,415.5700,421.9300,432.2900,458.3200,476.0200,485.8400,...,69.54500,70.15700,70.58300,71.05400,70.68100,70.54500,70.35700,70.56100,70.35700,70.71100


To make it consistent with (1), we need to transopose

In [9]:
tables['p'].values.shape

(7582, 999)

In [10]:
for key in tables:
    tables[key] = tables[key].values.T

In [11]:
tables['p'].shape

(999, 7582)

In [12]:
train_index = int(p_data.shape[1] * .5)
val_index = train_index + int(p_data.shape[1] * .1)
print(f"train_index: {train_index}, val_index: {val_index}")

train_index: 499, val_index: 598


In [13]:
assert(tables['p'].shape[0] < tables['p'].shape[1])   # Time is smaller than number of cells in mesh

### Splitting to train val test

In [14]:
train_data = tables['p'][:train_index, :]
val_data = tables['p'][train_index:val_index, :]
test_data = tables['p'][val_index:, :]
print(train_data.shape, val_data.shape, test_data.shape)

(499, 7582) (99, 7582) (401, 7582)


In [15]:
for key in tables:
    train_data = tables[key][:train_index, :]
    val_data = tables[key][train_index:val_index, :]
    test_data = tables[key][val_index:, :]
    print(train_data.shape, val_data.shape, test_data.shape)
    with open(f'train_{key}.pkl', 'wb') as f:
        pickle.dump(train_data, f)
    with open(f'val_{key}.pkl', 'wb') as f:
        pickle.dump(val_data, f)
    with open(f'test_{key}.pkl', 'wb') as f:
        pickle.dump(test_data, f)

(499, 7582) (99, 7582) (401, 7582)
(499, 7582) (99, 7582) (401, 7582)
(499, 7582) (99, 7582) (401, 7582)
(499, 7582) (99, 7582) (401, 7582)
(499, 7582) (99, 7582) (401, 7582)
(499, 7582) (99, 7582) (401, 7582)
(499, 7582) (99, 7582) (401, 7582)
