In [1]:
import numpy as np
import pandas as pd
import os
import re
import pathlib
import h5py

import src.SimData  as sim_data

In [2]:

def read_data_buffer(data_file):
    n_rows = None
    data_dict_h5 = {}
    
    with h5py.File(data_file, "r") as file:
        keys_hdf5 = list(file.keys())
        for key in keys_hdf5:
            data_dict_h5[key] = file[key][:]
            if n_rows is None and key != 'File_Data': # Use any data column to get n_rows
                n_rows = len(data_dict_h5[key])
    
    return data_dict_h5


In [3]:

### jorge dataset 
data_file_jorge = "Buffer_Data/Experimental_data_CO_Jorge.hdf5"
data_dict_h5_jorge = read_data_buffer(data_file_jorge)

data_dict_h5_jorge.pop('File_Data')
pd_jorge = pd.DataFrame(data_dict_h5_jorge)

In [4]:
print(pd_jorge.head())

     C  CO  CO2                 O                 O2            O3  \
0  0.0   0    0  8157238605327579  16385293162655330  5.460660e+11   
1  0.0   0    0  8157238605327579  16385293162655330  5.460660e+11   
2  0.0   0    0  7734088067177275  16748044168456480  5.432798e+11   
3  0.0   0    0  7734088067177275  16748044168456480  5.432798e+11   
4  0.0   0    0  9699256414667572  25296339610481060  8.603773e+11   

         Tgas         Tnw  Tw  current  frac_CO2  gamma_exp  pressure  
0  394.848150  332.243200  50       40       0.0   0.000775       1.0  
1  394.848150  332.243200  50       40       0.0   0.000843       1.0  
2  395.333494  332.346773  50       40       0.0   0.000775       1.0  
3  395.333494  332.346773  50       40       0.0   0.000843       1.0  
4  419.001166  335.174979  50       40       0.0   0.000827       1.5  


In [5]:

### loki datsaset
data_file_loki = "Buffer_Data/Experimental_data_CO_TD_LoKIV2.hdf5"
data_dict_h5_loki = read_data_buffer(data_file_loki)

data_dict_h5_loki.pop('File_Data')
pd_loki = pd.DataFrame(data_dict_h5_loki)


In [6]:
print(data_dict_h5_loki.keys())
print(data_dict_h5_loki['pressure'])

dict_keys(['C', 'CO', 'CO2', 'O', 'O2', 'O3', 'Tgas', 'Tnw', 'Tw', 'current', 'frac_CO2', 'gamma_exp', 'pressure'])
[1.5 2.  3.  5.  0.8 1.  1.5 2.  3.  5.  1.5 2.  3.  5.  1.  0.8 1.5 2.
 3.  5.  0.4 0.6 0.8 1.  1.5 0.4 0.6 0.8 0.4 1.  1.5 0.4 0.6 0.8 1.  0.4
 0.4 0.6 0.8 1.  1.5 2.  3.  5.  0.4 0.6 0.6 0.8 1.  1.5 2.  3.  5.  0.4
 0.6 0.8 1.  1.5 2.  0.4 0.6 0.8 1.  2.  3.  0.4 3.  5.  0.4 0.4 0.6 1.
 1.  1. ]


In [7]:

###  TD dataset
data_file_paper = "Buffer_Data/Experimental_data_Paper.hdf5"
data_dict_h5_paper = read_data_buffer(data_file_paper)

data_dict_h5_paper.pop('File_Data')
pd_paper = pd.DataFrame(data_dict_h5_paper)

In [8]:
pd_jorge.head()

Unnamed: 0,C,CO,CO2,O,O2,O3,Tgas,Tnw,Tw,current,frac_CO2,gamma_exp,pressure
0,0.0,0,0,8157238605327579,16385293162655330,546066000000.0,394.84815,332.2432,50,40,0.0,0.000775,1.0
1,0.0,0,0,8157238605327579,16385293162655330,546066000000.0,394.84815,332.2432,50,40,0.0,0.000843,1.0
2,0.0,0,0,7734088067177275,16748044168456480,543279800000.0,395.333494,332.346773,50,40,0.0,0.000775,1.0
3,0.0,0,0,7734088067177275,16748044168456480,543279800000.0,395.333494,332.346773,50,40,0.0,0.000843,1.0
4,0.0,0,0,9699256414667572,25296339610481060,860377300000.0,419.001166,335.174979,50,40,0.0,0.000827,1.5


In [9]:
print(data_dict_h5_paper.keys())
print(data_dict_h5_paper['pressure'])
print(data_dict_h5_paper['CO'].shape)

rename_fields = {'N' : 'O2'}
pd_paper = pd_paper.rename(columns=rename_fields)


dict_keys(['CO', 'N', 'O', 'Tnw', 'Tw', 'current', 'gamma_exp', 'pressure'])
[0.4 0.6 0.8 1.5 2.  3.  0.4 0.6 0.8 1.  1.5 2.  3.  5.  7.5 0.4 0.6 0.8
 1.  1.5 2.  3.  5.  0.4 0.6 0.8 1.  1.5 2.  3.  5.  7.5 0.4 0.6 0.8 1.
 1.5 2.  3.  5.  0.4 0.6 0.8 1.  1.5 2.  3.  5.  7.5 0.4 0.6 0.8 1.  1.5
 2.  3.  5.  0.4 0.6 0.8 1.  1.5 2.  3.  5.  7.5]
(66,)


In [10]:

combined_df = pd.concat([pd_jorge, pd_loki, pd_paper], ignore_index=True)
filled_df = combined_df.fillna(0)
final_df = filled_df.drop_duplicates()

In [11]:
print(final_df.head())
print(final_df.shape)

     C   CO  CO2             O            O2            O3        Tgas  \
0  0.0  0.0  0.0  8.157239e+15  1.638529e+16  5.460660e+11  394.848150   
1  0.0  0.0  0.0  8.157239e+15  1.638529e+16  5.460660e+11  394.848150   
2  0.0  0.0  0.0  7.734088e+15  1.674804e+16  5.432798e+11  395.333494   
3  0.0  0.0  0.0  7.734088e+15  1.674804e+16  5.432798e+11  395.333494   
4  0.0  0.0  0.0  9.699256e+15  2.529634e+16  8.603773e+11  419.001166   

          Tnw    Tw  current  frac_CO2  gamma_exp  pressure  
0  332.243200  50.0     40.0       0.0   0.000775       1.0  
1  332.243200  50.0     40.0       0.0   0.000843       1.0  
2  332.346773  50.0     40.0       0.0   0.000775       1.0  
3  332.346773  50.0     40.0       0.0   0.000843       1.0  
4  335.174979  50.0     40.0       0.0   0.000827       1.5  
(225, 13)


In [12]:
# print(list(final_df['CO']))

low_nb = np.sum(final_df['pressure'] < 1.0)
high_nb = np.sum(final_df['pressure'] >= 1.0)

print(low_nb)
print(high_nb)
print(low_nb/(low_nb + high_nb))


54
171
0.24


In [15]:
print(final_df['CO'][100:200])

109    1.131152e+16
110    2.084902e+16
111    2.735803e+16
112    3.952033e+16
113    6.310036e+16
           ...     
204    0.000000e+00
205    0.000000e+00
206    0.000000e+00
207    0.000000e+00
208    0.000000e+00
Name: CO, Length: 100, dtype: float64


In [13]:
ping()

NameError: name 'ping' is not defined

In [None]:
### convert to excel

path_excel = "Experimental_data_CO/Experimental_data_CO_O_merged.xlsx"
final_df.to_excel(path_excel, index=True)

In [None]:

schema_buffer = {
    "pressure":     "pressure",
    "current":      "current",
    "frac_CO2":     "frac_CO2",
    "Tgas":         "Tgas",
    "Tnw":          "Tnw", 
    "Tw":           "Tw",
    "O":            "O",
    "O2":           "O2",
    "O3":           "O3",
    "C":            "C",
    "CO":           "CO",
    "CO2":          "CO2",
    "gamma_exp":    "gamma_exp"
}

output_file = "Experimental_data_CO_O_merged.hdf5"
file_path = [path_excel]

data_loader = sim_data.DataLoader(schema_buffer, output_file=output_file, files_path=file_path)
data_loader.load_data(force_update=True)