# Convert HOBO sensor Excel files to parquet

In [1]:
import pytz
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
hobo_files = ['21852517 2024-07-22 10_25_20 EDT (Data EDT).xlsx',
              'FM-Temp-21695007-CB 2024-07-22 10_20_59 EDT (Data EDT).xlsx',
              'FM-Temp-21852518-LB 2024-07-22 10_18_03 EDT (Data EDT).xlsx',
              'FM-Temp-21852519 2024-07-22 10_21_36 EDT (Data EDT).xlsx',
              'FM-Temp-21852521-FD 2024-07-22 10_22_54 EDT (Data EDT).xlsx',
              'FM-Temp-21852524-CT 2024-07-22 10_26_09 EDT (Data EDT).xlsx'
             ]

In [4]:
srcdir = 'data/src/hobo'
dstdir = 'data/dst'

In [5]:
def read_hobo(fn, srcdir='.'):
    
    df = pd.read_excel(f'{srcdir}/{fn}', parse_dates=True, date_format='%Y-%m-%d %H:%M:%S', names=['DateTimeEDT','Temp'])

    # Localize to EDT as indicated in header
    df['DateTimeEDT'] = df['DateTimeEDT'].dt.tz_localize('America/New_York')

    df.set_index(df['DateTimeEDT'], inplace=True)
    df.index.rename('DateTime', inplace=True)
    df = df.asfreq(freq='15min')
    # df = df.resample('15min').first()

    # update the index to UTC
    df.index = df.index.tz_convert(pytz.utc)

    return df

In [6]:
import os

for file in hobo_files:
    df = read_hobo(file, srcdir=srcdir)
    basename = os.path.splitext(file)[0].replace('EDT', 'UTC')
    print(basename)
    df.to_parquet(f'{dstdir}/{basename}.parquet', index=True)

21852517 2024-07-22 10_25_20 UTC (Data UTC)
FM-Temp-21695007-CB 2024-07-22 10_20_59 UTC (Data UTC)
FM-Temp-21852518-LB 2024-07-22 10_18_03 UTC (Data UTC)
FM-Temp-21852519 2024-07-22 10_21_36 UTC (Data UTC)
FM-Temp-21852521-FD 2024-07-22 10_22_54 UTC (Data UTC)
FM-Temp-21852524-CT 2024-07-22 10_26_09 UTC (Data UTC)


In [None]:
df.index