This notebook with transform the output of Paraview into something feedable to ML

In [2]:
import pickle
import matplotlib.pyplot as plt
import pandas as pd

import numpy as np

from tqdm.notebook import tqdm

In [3]:
from pathlib import PurePath

In [4]:
NUM_FRAMES = 999   # number of timestep. Before was 9! 344!

Our simulation managed to create `NUM_FRAMES` in csv forms

In [5]:
data_paths = [PurePath("new_data", f"data_{i}.csv") for i in range(0,NUM_FRAMES)]

In [6]:
dfs_0 = pd.read_csv(str(data_paths[0]))
dfs_0

Unnamed: 0,Time,Points:0,Points:1,Points:2,U:0,U:1,U:2,k,nut,omega,p
0,0.002,0.024731,-0.001165,0.097489,-0.059273,0.062762,10.573,0.006028,0.000032,278.280,3517.600
1,0.002,0.004980,-0.016126,4.835800,-0.025099,0.018351,10.484,0.004322,0.000032,44.067,80.626
2,0.002,0.001506,-0.028538,4.913500,0.000000,0.000000,0.000,0.018508,0.000000,6961.300,42.379
3,0.002,0.026603,-0.010438,4.934800,0.000000,0.000000,0.000,0.018638,0.000000,6983.000,31.822
4,0.002,0.001855,0.024689,0.173850,0.040683,0.017779,10.479,0.006059,0.000025,287.620,3480.800
...,...,...,...,...,...,...,...,...,...,...,...
7577,0.002,-0.013326,0.025280,0.124640,0.000000,0.000000,0.000,0.018415,0.000000,6969.100,3504.500
7578,0.002,0.017788,0.021330,0.096825,-0.065950,0.078987,10.243,0.016613,0.000004,4931.500,3517.800
7579,0.002,0.005357,-0.015784,0.196900,0.017935,0.000499,10.545,0.004153,0.000026,24.108,3469.100
7580,0.002,0.024119,0.015329,0.657600,0.000000,0.000000,0.000,0.006105,0.000000,468.020,3139.400


There are five values to choose from:

| Terms | Meaning                                               |   
|-------|-------------------------------------------------------|
| u     | speed vector                                          |
| k     | rate of dissipation from kinematic turbulence to heat |                                                                     
| omega | omega is the kinematic turbulence energy              |                                                                 
| nut   | kinematic turbulent viscosity                         |
| p     | Pressure                                              |

Matrix is a long one, has the shape of `(m=time, n=space)`
$$\begin{bmatrix} C_{1,1} & C_{1,2} & \cdots & C_{1,n} \\ C_{2,1} & C_{2,2} & \cdots & C_{2,n} \\ \vdots & \vdots & \vdots & \vdots \\ C_{m,1} & C_{m,2} & \cdots & C_{m,n} \end{bmatrix}  (1)$$

In [7]:
dfs_1 = pd.read_csv(str(data_paths[1]))
dfs_1

Unnamed: 0,Time,Points:0,Points:1,Points:2,U:0,U:1,U:2,k,nut,omega,p
0,0.004,0.024731,-0.001165,0.097489,-0.067505,0.026594,11.308,0.005933,4.079100e-05,279.510,485.5100
1,0.004,0.004980,-0.016126,4.835800,0.114840,0.041690,11.286,0.004493,3.888800e-05,73.945,5.6927
2,0.004,0.001506,-0.028538,4.913500,0.000000,0.000000,0.000,0.561540,3.605000e-07,10798.000,3.0672
3,0.004,0.026603,-0.010438,4.934800,0.000000,0.000000,0.000,0.552950,2.581800e-07,10772.000,2.3105
4,0.004,0.001855,0.024689,0.173850,-0.012177,-0.022595,11.120,0.005981,4.250800e-05,294.770,482.9800
...,...,...,...,...,...,...,...,...,...,...,...
7577,0.004,-0.013326,0.025280,0.124640,0.000000,0.000000,0.000,0.522590,3.248700e-08,10586.000,484.6300
7578,0.004,0.017788,0.021330,0.096825,-0.070005,0.075416,10.398,0.309730,3.451100e-05,6743.200,485.4800
7579,0.004,0.005357,-0.015784,0.196900,0.011529,-0.017054,11.499,0.004344,5.775900e-05,39.388,481.9700
7580,0.004,0.024119,0.015329,0.657600,0.000000,0.000000,0.000,0.014172,0.000000e+00,545.820,430.9400


### Start ETL

In [13]:
chosen_columns = ['U:0', 'U:1', 'U:2', 'k', 'nut', 'omega', 'p']
tables = {}
for term in chosen_columns:
    print(f"Working with {term}")
    dfs_0 = pd.read_csv(str(data_paths[0]))
    p_data = dfs_0[term].copy()
    for i in tqdm(range(1,NUM_FRAMES)):
        _ = pd.read_csv(str(data_paths[i]))
        pd.testing.assert_frame_equal(dfs_0[["Points:0", "Points:1", "Points:2"]], _[["Points:0", "Points:1", "Points:2"]]) # Make sure all points coordinates are the same accross time
        p_data = pd.concat([p_data, _[term]], axis=1)        
    print(f"    shape of {term} {p_data.shape}")
    tmp = p_data.values.flatten()
    if term == 'U:0':
        max_u0 = np.max(tmp)
    if term == 'U:1':
        max_u1 = np.max(tmp)
    if term == 'U:2':
        max_u2 = np.max(tmp)
    print(f"    stats: {np.max(tmp)}, {np.min(tmp)}, {np.median(tmp)}, {np.std(tmp)}")
    tables[term] = p_data
assert max_u2 > max_u1 and max_u2 > max_u0

Working with U:0


  0%|          | 0/998 [00:00<?, ?it/s]

    shape of U:0 (7582, 999)
    stats: 6.237, -5.4987, 0.0, 0.48821991224659234
Working with U:1


  0%|          | 0/998 [00:00<?, ?it/s]

    shape of U:1 (7582, 999)
    stats: 4.6267, -4.7924, 0.0, 0.43915575060251183
Working with U:2


  0%|          | 0/998 [00:00<?, ?it/s]

    shape of U:2 (7582, 999)
    stats: 39.224, -0.771, 10.494, 6.971926740615387
Working with k


  0%|          | 0/998 [00:00<?, ?it/s]

    shape of k (7582, 999)
    stats: 8.549, 0.00375, 0.78422, 0.9294222341152129
Working with nut


  0%|          | 0/998 [00:00<?, ?it/s]

    shape of nut (7582, 999)
    stats: 0.001675, 0.0, 0.00010224, 0.00029039667686375916
Working with omega


  0%|          | 0/998 [00:00<?, ?it/s]

    shape of omega (7582, 999)
    stats: 23856.0, 3.375, 3013.1, 4774.403157469514
Working with p


  0%|          | 0/998 [00:00<?, ?it/s]

    shape of p (7582, 999)
    stats: 3566.0, -320.15, 0.44396, 209.10706385625687


---

In [None]:
tables['p']

To make it consistent with (1), we need to transopose

In [None]:
tables['p'].values.shape

In [None]:
for key in tables:
    tables[key] = tables[key].values.T

In [None]:
tables['p'].shape

In [None]:
train_index = int(p_data.shape[1] * .5)
val_index = train_index + int(p_data.shape[1] * .1)
print(f"train_index: {train_index}, val_index: {val_index}")

In [None]:
assert(tables['p'].shape[0] < tables['p'].shape[1])   # Time is smaller than number of cells in mesh

### Splitting to train val test

In [None]:
train_data = tables['p'][:train_index, :]
val_data = tables['p'][train_index:val_index, :]
test_data = tables['p'][val_index:, :]
print(train_data.shape, val_data.shape, test_data.shape)

In [None]:
for key in tables:
    print(key)
    import pdb; pdb.set_trace()
    train_data = tables[key][:train_index, :]
    val_data = tables[key][train_index:val_index, :]
    test_data = tables[key][val_index:, :]
    print(train_data.shape, val_data.shape, test_data.shape)
    with open(f'train_{key}.pkl', 'wb') as f:
        pickle.dump(train_data, f)
    with open(f'val_{key}.pkl', 'wb') as f:
        pickle.dump(val_data, f)
    with open(f'test_{key}.pkl', 'wb') as f:
        pickle.dump(test_data, f)