This notebook with transform the output of Paraview into something feedable to ML

In [50]:
import pickle

import pandas as pd

import numpy as np

from tqdm.notebook import tqdm

In [51]:
from pathlib import PurePath

In [52]:
NUM_FRAMES = 999   # number of timestep. Before was 9! 344!

Our simulation managed to create `NUM_FRAMES` in csv forms

In [55]:
data_paths = [PurePath("new_data", f"data_{i}.csv") for i in range(0,NUM_FRAMES)]

In [56]:
dfs_0 = pd.read_csv(str(data_paths[0]))
dfs_0

Unnamed: 0,Points:0,Points:1,Points:2,U:0,U:1,U:2,k,nut,omega,p
0,-0.013184,0.000209,0.001576,-0.071583,-0.039038,10.6240,0.026240,8.245900e-08,339940.0,35136.000
1,0.028577,0.000500,4.113900,0.000000,0.000000,0.0000,0.006365,1.227600e-07,10095.0,6403.800
2,0.004193,0.000439,0.002325,-0.064530,0.025779,9.2245,0.032725,3.664300e-08,923350.0,35120.000
3,0.002439,-0.000347,4.999400,0.040195,0.033054,10.0090,0.031677,4.682800e-08,705440.0,13.258
4,-0.007843,0.000186,0.002879,-0.148430,-0.263730,10.8810,0.027487,7.934700e-08,358030.0,35101.000
...,...,...,...,...,...,...,...,...,...,...
17178,0.016414,-0.000500,0.075661,0.000000,0.000000,0.0000,0.007894,0.000000e+00,17570.0,34392.000
17179,0.016387,-0.000500,0.000997,0.000000,0.000000,0.0000,0.032178,0.000000e+00,1171000.0,35155.000
17180,-0.017435,-0.000500,0.004380,0.000000,0.000000,0.0000,0.030933,0.000000e+00,633070.0,35084.000
17181,0.017914,-0.000439,5.000000,-0.040359,-0.038938,9.3870,0.033076,3.329900e-08,1015600.0,0.000


There are five values to choose from:

| Terms | Meaning                                               |   
|-------|-------------------------------------------------------|
| u     | speed vector                                          |
| k     | rate of dissipation from kinematic turbulence to heat |                                                                     
| omega | omega is the kinematic turbulence energy              |                                                                 
| nut   | kinematic turbulent viscosity                         |
| p     | Pressure                                              |

Matrix is a long one, has the shape of `(m=time, n=space)`
$$\begin{bmatrix} C_{1,1} & C_{1,2} & \cdots & C_{1,n} \\ C_{2,1} & C_{2,2} & \cdots & C_{2,n} \\ \vdots & \vdots & \vdots & \vdots \\ C_{m,1} & C_{m,2} & \cdots & C_{m,n} \end{bmatrix}  (1)$$

In [19]:
dfs_1 = pd.read_csv(str(data_paths[1]))
dfs_1

Unnamed: 0,Points:0,Points:1,Points:2,k,nut,omega,p
0,-0.013184,0.000209,0.001576,0.013179,1.647500e-07,80294.0,12834.00000
1,0.028577,0.000500,4.113900,0.010939,8.956200e-07,10116.0,2245.60000
2,0.004193,0.000439,0.002325,0.110610,2.003100e-07,600240.0,12833.00000
3,0.002439,-0.000347,4.999400,0.023297,1.932800e-07,118310.0,0.38197
4,-0.007843,0.000186,0.002879,0.015415,1.704000e-07,90478.0,12832.00000
...,...,...,...,...,...,...,...
17178,0.016414,-0.000500,0.075661,0.016093,0.000000e+00,17607.0,12775.00000
17179,0.016387,-0.000500,0.000997,0.211070,0.000000e+00,1171200.0,12835.00000
17180,-0.017435,-0.000500,0.004380,0.204520,0.000000e+00,633320.0,12831.00000
17181,0.017914,-0.000439,5.000000,0.110350,1.890200e-07,654590.0,0.00000


### Start ETL

In [57]:
chosen_columns = ['U:0', 'U:1', 'U:2', 'k', 'nut', 'omega', 'p']
tables = {}
for term in chosen_columns:
    print(f"Working with {term}")
    dfs_0 = pd.read_csv(str(data_paths[0]))
    p_data = dfs_0[term].copy()
    for i in tqdm(range(1,NUM_FRAMES)):
        _ = pd.read_csv(str(data_paths[i]))
        pd.testing.assert_frame_equal(dfs_0[["Points:0", "Points:1", "Points:2"]], _[["Points:0", "Points:1", "Points:2"]]) # Make sure all points coordinates are the same accross time
        p_data = pd.concat([p_data, _[term]], axis=1)
    print(f"shape of p_data {p_data.shape}")
    tables[term] = p_data

Working with U:0


  0%|          | 0/998 [00:00<?, ?it/s]

shape of p_data (17183, 999)
Working with U:1


  0%|          | 0/998 [00:00<?, ?it/s]

shape of p_data (17183, 999)
Working with U:2


  0%|          | 0/998 [00:00<?, ?it/s]

shape of p_data (17183, 999)
Working with k


  0%|          | 0/998 [00:00<?, ?it/s]

shape of p_data (17183, 999)
Working with nut


  0%|          | 0/998 [00:00<?, ?it/s]

shape of p_data (17183, 999)
Working with omega


  0%|          | 0/998 [00:00<?, ?it/s]

shape of p_data (17183, 999)
Working with p


  0%|          | 0/998 [00:00<?, ?it/s]

shape of p_data (17183, 999)


---

In [58]:
tables['p']

Unnamed: 0,p,p.1,p.2,p.3,p.4,p.5,p.6,p.7,p.8,p.9,...,p.10,p.11,p.12,p.13,p.14,p.15,p.16,p.17,p.18,p.19
0,35136.000,12834.00000,5356.20000,2366.90000,1557.20000,1428.60000,1469.30000,1540.80000,1608.70000,1664.70000,...,509.03000,509.15000,509.16000,509.20000,509.17000,509.33000,509.38000,509.40000,509.16000,509.00000
1,6403.800,2245.60000,950.28000,408.05000,268.57000,251.02000,259.52000,274.47000,287.76000,300.22000,...,93.87500,93.91200,93.92100,93.93100,93.91400,93.91600,93.91300,93.93200,93.91400,93.88600
2,35120.000,12833.00000,5355.80000,2366.30000,1556.50000,1427.80000,1468.40000,1539.90000,1607.80000,1663.80000,...,508.48000,508.60000,508.61000,508.65000,508.62000,508.78000,508.83000,508.85000,508.61000,508.45000
3,13.258,0.38197,0.39249,0.33601,0.30548,0.30886,0.31336,0.30765,0.31152,0.31213,...,0.14933,0.14932,0.14927,0.14922,0.14916,0.14915,0.14914,0.14917,0.14916,0.14917
4,35101.000,12832.00000,5355.40000,2366.00000,1556.30000,1427.60000,1468.20000,1539.70000,1607.50000,1663.50000,...,508.36000,508.48000,508.49000,508.53000,508.50000,508.66000,508.71000,508.73000,508.49000,508.33000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17178,34392.000,12775.00000,5322.00000,2337.70000,1529.30000,1401.30000,1442.50000,1514.20000,1582.10000,1638.00000,...,498.27000,498.39000,498.40000,498.44000,498.41000,498.57000,498.62000,498.64000,498.40000,498.24000
17179,35155.000,12835.00000,5357.20000,2367.60000,1557.80000,1429.10000,1469.80000,1541.30000,1609.20000,1665.30000,...,509.36000,509.47000,509.48000,509.52000,509.49000,509.65000,509.70000,509.72000,509.48000,509.32000
17180,35084.000,12831.00000,5354.60000,2365.30000,1555.60000,1427.00000,1467.60000,1539.10000,1606.90000,1662.90000,...,508.09000,508.20000,508.22000,508.25000,508.22000,508.38000,508.43000,508.45000,508.21000,508.05000
17181,0.000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000


To make it consistent with (1), we need to transopose

In [59]:
tables['p'].values.shape

(17183, 999)

In [60]:
for key in tables:
    tables[key] = tables[key].values.T

In [61]:
tables['p'].shape

(999, 17183)

In [62]:
train_index = int(p_data.shape[1] * .5)
val_index = train_index + int(p_data.shape[1] * .1)
print(f"train_index: {train_index}, val_index: {val_index}")

train_index: 499, val_index: 598


In [63]:
assert(tables['p'].shape[0] < tables['p'].shape[1])   # Time is smaller than number of cells in mesh

### Splitting to train val test

In [64]:
train_data = tables['p'][:train_index, :]
val_data = tables['p'][train_index:val_index, :]
test_data = tables['p'][val_index:, :]
print(train_data.shape, val_data.shape, test_data.shape)

(499, 17183) (99, 17183) (401, 17183)


In [65]:
for key in tables:
    train_data = tables[key][:train_index, :]
    val_data = tables[key][train_index:val_index, :]
    test_data = tables[key][val_index:, :]
    print(train_data.shape, val_data.shape, test_data.shape)
    with open(f'train_{key}.pkl', 'wb') as f:
        pickle.dump(train_data, f)
    with open(f'val_{key}.pkl', 'wb') as f:
        pickle.dump(val_data, f)
    with open(f'test_{key}.pkl', 'wb') as f:
        pickle.dump(test_data, f)

(499, 17183) (99, 17183) (401, 17183)
(499, 17183) (99, 17183) (401, 17183)
(499, 17183) (99, 17183) (401, 17183)
(499, 17183) (99, 17183) (401, 17183)
(499, 17183) (99, 17183) (401, 17183)
(499, 17183) (99, 17183) (401, 17183)
(499, 17183) (99, 17183) (401, 17183)
