In [1]:
import duckdb
import pandas as pd
import tapgdc
# note: pyarrow is also required for saving to parquet in pandas

# for reporting size difference
import os

# Data storage path
save_path = '../../../tap_data'

Create the metadata first

In [2]:
meta_data = tapgdc.init_metadata()
meta_data['catalyst'] = 'Pt'
meta_data['catalyst_amt_mg'] = 15.6
meta_data['creator'] = 'Zongtang Fang'
meta_data['name'] = 'irreversibleO2'
meta_data['paper_DOI'] = 'https://doi.org/10.1016/j.cattod.2022.02.010'
meta_data['date_created'] = '2021-12-16'
meta_data['time_delta_s'] = 0.001


argon_meta = tapgdc.init_pulse_iteration()
oxygen_meta = tapgdc.init_pulse_iteration()
oxygen_meta['amu'] = 32
oxygen_meta['rtype'] = 'reactant'
meta_data['pulse_iteration'] = [argon_meta, oxygen_meta]
meta_data['preparation_notes'] ='The Strong Electrostatic Adsorption (SEA) method was used for the synthesis of 1.0 wt% Pt/SiO2 catalyst.  A commercial silica (AEROSIL OX50, $50 m2/g) from EVONIK was chosen as the support and a precursor of tetraamine platinum (II) hydroxide (Pt(NH3)4(OH)2, 99%, from Aldrich) was employed to deposit the metal. The precursor was dissolved in deionized (DI) water and the initial pH was adjusted to 11.5 with NaOH. Silica was added to the solution and the contents were shaken for one hour.  The resulting mixture was washed with DI water, filtered, and dried overnight under vacuum.  The material was pressed and sieved, retaining the 250 - 300  mu m fraction.  Next, the catalyst was pretreated ex-situ in 50% oxygen and argon flow (30 mL/min) at 400C for $30 min$ followed by reduction in 4% hydrogen and argon flow (50 mL/min) at 400C for one hour. The ex-situ oxidation and reduction was performed with three cycles. Approximately 15.6 mg of pretreated catalyst with the particle size of 250 - 300 mu m was loaded between two zones of the same particle size quartz sand (Sigma Aldrich).  The total length of the reactor was 0.0564m, with a catalyst zone of 0.002 m, and a cross sectional area of 1.256X10-5  m2. The TAP reactor was evacuated at 300C to a pressure of 1X10-7 torr and the catalyst was subjected to at least three cycles of alternating pulses of 200 pulses of carbon monoxide and argon and 200 pulses of oxygen and argon to activate the platinum and reach a reproducible starting point for pulsing experiments. Prior to oxygen adsorption, the catalyst was again reduced at 300C by introducing a sequence of 50% carbon monoxide and argon pulses until no carbon dioxide formation was detected. The TAP reactor was subsequently heated to 500C and kept for 30 min to remove adsorbed carbon monoxide and then cooled to the desired temperature for testing oxidation. The adsorption of oxygen on the catalyst was recorded in separate experiments by pulsing a 1:1 oxygen and argon mixture at 300C with different pulsing intervals of 2.0, 2.5, 3.0, 3.5, and 4.0 s. The time evolution of three mass fragments was followed, namely argon (AMU 40), oxygen (AMU 32), and carbon dioxide (AMU 44). There was no carbon dioxide production detected at the beginning of each oxidation experiment.'

In [4]:
# read in the data
argon_path = '../../../TAP/zongtang_oxygen_data/argon.csv'
oxygen_path = '../../../TAP/zongtang_oxygen_data/oxygen.csv'
csv_paths = [argon_path, oxygen_path]
tapgdc.csv2table(meta_data, csv_paths, save_path)

File size ratio of parquet over CSVs. Lower means better compression.

In [5]:
tmp_path = save_path + '/timeseries/' + meta_data['ID'] + '.parquet'
os.path.getsize(tmp_path) / (os.path.getsize(argon_path) + os.path.getsize(oxygen_path))

0.14734534203840377

Example for TDMS file

In [6]:
meta_data = tapgdc.init_metadata()
meta_data['ID'] = '0002'
meta_data['name'] = '0.5Pt_CO-25C-set1'
meta_data['time_delta_s'] = 0.001
file_path = '../../../TAP/random_walk/data/0.5Pt_CO-25C-set1.tdms'

In [7]:
tapgdc.tdms2table(meta_data, file_path, save_path)

In [7]:
tmp_path = save_path + '/timeseries/' + meta_data['ID'] + '.parquet'
os.path.getsize(tmp_path) / (os.path.getsize(file_path))

0.37835401975357685

Pulling the data

In [8]:
test = duckdb.execute("SELECT * FROM '~/Documents/tap_data/timeseries/0001.parquet' WHERE pulse_index = 199 AND pulse_iteration = 1").df()
test.head()

Unnamed: 0,pulse_iteration,pulse_index,time_index,flux
0,1,199,0,-0.0447
1,1,199,1,-0.0467
2,1,199,2,-0.0531
3,1,199,3,-0.0489
4,1,199,4,-0.0622


In [9]:
test.shape

(4000, 4)

In [10]:
test = duckdb.execute("SELECT * FROM '~/Documents/tap_data/timeseries/0002.parquet' WHERE pulse_index = 199 AND pulse_iteration = 1").df()
test.head()

Unnamed: 0,pulse_iteration,pulse_index,time_index,flux
0,1,199,0,0.0622
1,1,199,1,0.0684
2,1,199,2,0.0635
3,1,199,3,0.0645
4,1,199,4,0.0645


In [11]:
test.shape

(8100, 4)