In [1]:
import re
import numpy as np
import pandas as pd

In [3]:
p = pd.read_table("data/scd30Log.log")

In [4]:
p.iloc[-1, :][0]

"INFO:root:CO2: 698.30ppm, temp: 14.51'C, rh: 84.14%"

In [6]:
# p.iloc[:30, :][0]

def get_met_values(line):
    fields = (
        line.replace("INFO:root:CO2:", "")
        .strip()
        .split(',')
    )
    co2, rh, temp = None, None, None
    for field in fields:
        if "ppm" in field.lower():
            co2 = re.sub(r'[^0-9 .-]', '', field).strip()
        if "rh" in field.lower():
            rh = re.sub(r'[^0-9 .-]', '', field).strip()
        if "temp" in field.lower():
            temp = re.sub(r'[^0-9 .-]', '', field).strip()
    return co2, rh, temp
    

In [3]:
import gzip

def get_variable_vals(line: list) -> dict:
    ''' Extract the co2, temp, and rh values by string matching
      and export row as dictionary
    '''
    vars = ["ppm", "temp", "rh"]
    var_dict = {key: None for key in vars}
    for var in var_dict:
        final = [field for field in fields if var in field]
        final_val = re.sub(r'[^0-9 .-]', '', final[0]).strip()
        var_dict[var] = final_val
    return var_dict

co2_list, rh_list, temp_list = [], [], []
with gzip.open("data/scd30Log.log.gz", mode='rt', encoding='UTF-8') as f:
    for line in f:
        if line.startswith("INFO:root:CO2"):
            fields = (
                line.replace("INFO:root:CO2:", "")
                .strip()
                .split(',')
            )
            var_dict = get_variable_vals(fields)

            co2_list.append(var_dict.get('ppm'))
            temp_list.append(var_dict.get('temp'))
            rh_list.append(var_dict.get('rh'))

In [4]:
df = pd.DataFrame({
    "temp": temp_list,
    "rh": rh_list,
    "co2": co2_list
}).reset_index()
df.rename(columns={'index': 'timestamp'}, inplace=True)
df = df.astype(float)
df

Unnamed: 0,timestamp,temp,rh,co2
0,0.0,20.17,73.97,642.83
1,1.0,17.25,88.36,0.00
2,2.0,17.30,88.46,383.45
3,3.0,17.35,87.78,360.78
4,4.0,17.41,86.97,374.45
...,...,...,...,...
2690643,2690643.0,14.51,84.16,697.80
2690644,2690644.0,14.48,84.12,697.79
2690645,2690645.0,14.51,84.12,698.04
2690646,2690646.0,14.51,84.13,698.18


In [44]:
out_df = df.copy()
out_df['timestamp'] = out_df['timestamp'] // 3600
out_df.groupby('timestamp')[['temp', 'rh', 'co2']].mean().reset_index()

Unnamed: 0,timestamp,temp,rh,co2
0,0.0,18.456725,83.334869,621.125253
1,1.0,18.973992,82.239675,666.810117
2,2.0,18.179183,84.914275,653.115156
3,3.0,17.883064,84.890925,670.771253
4,4.0,20.083831,78.134033,757.317186
...,...,...,...,...
743,743.0,13.489028,83.170308,541.985758
744,744.0,13.501786,83.363517,550.353603
745,745.0,13.650853,83.724092,575.527733
746,746.0,13.924997,84.096200,594.260331


In [45]:
# export to CSV
out_df.to_csv('data/clean_data.csv')