# THIES Binary Processor

THIES DL16 exports two types of data in two different directories: 
* `ARCH_AV1` for avarage values of a 10 min span. Consists of 19 parameters of DataType Float.
* `ARCH_EX1` for both min and max values measured within the 10 min span. Consists of 32 parameters of DataType FloatExtrem.

There is a file for each day measured. File names are in `YYYYMMDD.BIN` format.

In [6]:
import numpy as np
from bitarray import bitarray
import pandas as pd
import configparser
import struct

# Size of row (bytes)
AV_ROW_SIZE = 99
EX_ROW_SIZE = 292
# Size (n of parameters)
AV_DESC_SIZE = 19
EX_DESC_SIZE = 32
# IGNORE first 4 bytes of each row (timestamp?) PENDIENTE !!!
OFFSET = 4 
STATUS_CHAR = {0b00000000 : 0,  # Status OK
              0b10000000 : '-', # Sensor is deactivated in the sensor configuration
              0b01000000 : '-', # Datalogger is in maintenance mode
              0b00100000 : '%', # Timeout (for ex. digitalization takes too long)
              0b00010000 : '!', # Value is out of valid range
              0b00001000 : '@', # Difference between 2 consecutive values is too far
              0b00000100 : '#', # Filling level of the averaging buffer is too low
              0b00000010 : '?', # Error depending on measurement type (for ex. ADC overflow)
              0b00000001 : '?'} # Error depending on measurement type (for ex. cable break)

In [47]:
def read_descfile(path: str) -> dict:
    ''' 
    Input: path DESCFILE.INI
    Returns: dict 
        key is index [i]
        value is dict with parameters from .ini
    '''
    config = configparser.ConfigParser()
    config.read(path)
    data_dict = {}
    for section in config.sections():
        section_dict = dict(config.items(section))
        data_dict[int(section)] = section_dict

    return data_dict

# DataType: DataTime
def bytes2datetime(b: bytes, full_date=False):
    # b is size 4
    bits = bitarray()
    bits.frombytes(b)
    sec = int(bits[0:6][::-1].to01(),2)
    min = int(bits[6:12][::-1].to01(),2)
    hr = int(bits[12:17][::-1].to01(),2)
    time = f'{str(hr).zfill(2)}:{str(min).zfill(2)}'
    if full_date is False:
        return time
    day = int(bits[17:22][::-1].to01(),2)
    mon = int(bits[22:26][::-1].to01(),2)
    yr = int(bits[26:][::-1].to01(),2)
    date = f'20{yr}/{str(mon).zfill(2)}/{str(day).zfill(2)}'
    return date + ' ' + time + f':{str(sec).zfill(2)}'

In [8]:
path_bin_av = './BINFILES/ARCH_AV1/20240531.BIN'
path_bin_ex = './BINFILES/ARCH_EX1/20240531.BIN'
path_ini_av = './BINFILES/ARCH_AV1/DESCFILE.INI'
path_ini_ex = './BINFILES/ARCH_EX1/DESCFILE.INI'

descfile_av = read_descfile(path_ini_av)
descfile_ex = read_descfile(path_ini_ex)

## AV Files
**[Casi listo. Falta leer los 4 primeros bytes y agregar hora-fecha a cada fila.]**

Example file: 31 mayo, 2024
* File: 14.256 bytes
* Rows: 144
* Parameters: 19
* Bytes per row: 99 bytes

In [9]:
with open(path_bin_av, "rb") as bin_file:
    binfile = bin_file.read()

size = len(binfile)
rows = size // AV_ROW_SIZE
print(f'{size} bytes in {rows} rows')


14256 bytes in 144 rows


In [None]:
# Makes array with 144 rows of 95 bytes each
byterows = [binfile[i*AV_ROW_SIZE + OFFSET : (i+1)*AV_ROW_SIZE ] for i in range(0, rows)]

In [None]:
data_arr = np.zeros((rows,AV_DESC_SIZE))
status_arr = np.zeros((rows,AV_DESC_SIZE))
for i, row in enumerate(byterows):
    for j in range(0,19):
        # Status = byte 1
        status = row[j*5]
        status_arr[i][j] = status

        # Value = bytes 2-5, float
        value = struct.unpack('<f', row[j*5+1 : j*5+5])[0]
        data_arr[i][j] = round(value,1)    

In [None]:
data_df = pd.DataFrame(data_arr)
status_df = pd.DataFrame(status_arr)
data_df = data_df.rename(columns={i: descfile_av[i+1]['name'] for i in range(19)})
status_df = status_df.rename(columns={i: descfile_av[i+1]['name'] for i in range(19)})
data_df


Unnamed: 0,UBat,Pressure,PrTemp,Precipitation,WS,WD,WS std. dev.,WD std. dev.,Humidity,AirTemperature,Radiation,PicoMoisture 1,PicoSoilTemp 1,PicoMoisture 2,PicoSoilTemp 2,CO2,UVA Radiation,UVB Radiation,PAR Radiation
0,11.7,1013.7,8.0,0.0,0.8,24.0,0.1,24.3,100.0,5.0,-0.1,0.0,0.0,0.0,0.0,515.8,-0.1,0.0,-0.8
1,11.7,1013.5,8.0,0.0,0.6,329.0,0.2,82.1,100.0,5.0,-0.2,0.0,0.0,0.0,0.0,506.4,-0.1,0.0,-0.8
2,11.7,1013.4,8.0,0.0,0.6,12.3,0.2,27.6,100.0,4.8,-0.2,0.0,0.0,0.0,0.0,518.1,-0.1,0.0,-0.7
3,11.7,1013.2,7.9,0.0,0.8,24.6,0.3,25.1,100.0,4.7,-0.2,0.0,0.0,0.0,0.0,515.2,-0.1,0.0,-0.7
4,11.7,1013.1,7.8,0.0,0.8,17.5,0.1,45.2,100.0,4.7,-0.2,0.0,0.0,0.0,0.0,504.1,-0.1,0.0,-0.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,11.6,1009.7,5.4,0.0,0.7,13.8,0.3,44.2,100.0,3.0,-0.6,0.0,0.0,0.0,0.0,453.8,-0.1,0.0,-0.7
140,11.6,1009.9,5.4,0.0,0.5,69.3,0.3,35.9,100.0,3.0,-0.2,0.0,0.0,0.0,0.0,451.1,-0.1,0.0,-0.8
141,11.6,1009.8,5.4,0.0,0.8,69.6,0.4,34.1,100.0,3.2,-0.3,0.0,0.0,0.0,0.0,453.7,-0.1,0.0,-0.7
142,11.6,1009.9,5.5,0.0,1.0,55.2,0.4,51.5,100.0,3.2,-0.2,0.0,0.0,0.0,0.0,451.1,-0.1,0.0,-0.7


In [None]:
data_df = data_df.where(status_df == 0.0, other=None)
data_df

Unnamed: 0,UBat,Pressure,PrTemp,Precipitation,WS,WD,WS std. dev.,WD std. dev.,Humidity,AirTemperature,Radiation,PicoMoisture 1,PicoSoilTemp 1,PicoMoisture 2,PicoSoilTemp 2,CO2,UVA Radiation,UVB Radiation,PAR Radiation
0,11.7,1013.7,8.0,0,0.8,24.0,0.1,24.3,100,5.0,-0.1,,,,,515.8,-0.1,0,-0.8
1,11.7,1013.5,8.0,0,0.6,329.0,0.2,82.1,100,5.0,-0.2,,,,,506.4,-0.1,0,-0.8
2,11.7,1013.4,8.0,0,0.6,12.3,0.2,27.6,100,4.8,-0.2,,,,,518.1,-0.1,0,-0.7
3,11.7,1013.2,7.9,0,0.8,24.6,0.3,25.1,100,4.7,-0.2,,,,,515.2,-0.1,0,-0.7
4,11.7,1013.1,7.8,0,0.8,17.5,0.1,45.2,100,4.7,-0.2,,,,,504.1,-0.1,0,-0.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,11.6,1009.7,5.4,0,0.7,13.8,0.3,44.2,100,3.0,-0.6,,,,,453.8,-0.1,0,-0.7
140,11.6,1009.9,5.4,0,0.5,69.3,0.3,35.9,100,3.0,-0.2,,,,,451.1,-0.1,0,-0.8
141,11.6,1009.8,5.4,0,0.8,69.6,0.4,34.1,100,3.2,-0.3,,,,,453.7,-0.1,0,-0.7
142,11.6,1009.9,5.5,0,1.0,55.2,0.4,51.5,100,3.2,-0.2,,,,,451.1,-0.1,0,-0.7


## EX Files
**[Casi listo. Falta leer los 4 primeros bytes. Los resultados dan fechas extrañas...]**

In [22]:
with open(path_bin_ex, "rb") as bin_file:
    binfile = bin_file.read()

size = len(binfile)
rows = size // EX_ROW_SIZE
print(f'{size} bytes in {rows} rows')


42048 bytes in 144 rows


In [None]:
byterows = [binfile[i*EX_ROW_SIZE + OFFSET : (i+1)*EX_ROW_SIZE] for i in range(0, rows)]

data_arr = np.zeros((rows,EX_DESC_SIZE))
status_arr = np.zeros((rows,EX_DESC_SIZE))
dates_arr = np.empty((rows,EX_DESC_SIZE), dtype=object)

for i, row in enumerate(byterows):
    for j in range(EX_DESC_SIZE):
        # Status = byte 1
        status = row[j*9]
        status_arr[i, j] = status

        # Value = bytes 2-5, float
        value = struct.unpack('<f', row[j*9 + 1 : j*9 + 5])[0]
        data_arr[i, j] = round(value,1)

        # Datetime = bytes 6-9
        datetime = bytes2datetime(row[j*9 + 5 : j*9 + 9], full_date=False) 
        dates_arr[i, j] = datetime
data_df_ex = pd.DataFrame(data_arr)
status_df_ex = pd.DataFrame(status_arr)
dates_df_ex = pd.DataFrame(dates_arr)

data_df_ex = data_df_ex.rename(columns={i: descfile_ex[i+1]['name'] for i in range(EX_DESC_SIZE)})
status_df_ex = status_df_ex.rename(columns={i: descfile_ex[i+1]['name'] for i in range(EX_DESC_SIZE)})
dates_df_ex = dates_df_ex.rename(columns={i: descfile_ex[i+1]['name'] for i in range(EX_DESC_SIZE)})

In [44]:
data_df_ex = data_df_ex.where(status_df_ex == 0.0, other=None)
data_df_ex

Unnamed: 0,UBat MIN,UBat MAX,Pressure MIN,Pressure MAX,PrTemp MIN,PrTemp MAX,WS MIN,WS MAX gust,WD MIN,WD MAX gust,...,PicoSoilTemp 2 MIN,PicoSoilTemp 2 MAX,CO2 MIN,CO2 MAX,UVA Radiation MIN,UVA Radiation MAX,UVB Radiation MIN,UVB Radiation MAX,PAR Radiation MIN,PAR Radiation MAX
0,11.7,11.7,1013.6,1013.8,8.0,8.1,0.4,1.2,327.5,16.6,...,,,485.7,527.7,-0.1,-0.1,0,0,-0.8,-0.7
1,11.7,11.7,1013.4,1013.7,8.0,8.0,0.0,1.0,0.0,97.5,...,,,493.5,519.1,-0.1,-0.1,0,0,-0.8,-0.7
2,11.7,11.7,1013.1,1013.6,7.9,8.0,0.3,0.9,17.5,7.5,...,,,497.5,533.4,-0.1,-0.1,0,0,-0.8,-0.7
3,11.7,11.7,1013.0,1013.3,7.9,8.0,0.3,1.4,347.5,37.5,...,,,501.1,526.9,-0.1,-0.1,0,0,-0.8,-0.7
4,11.7,11.7,1013.0,1013.2,7.8,7.9,0.4,1.1,355.0,85.8,...,,,493.6,515.8,-0.1,-0.1,0,0,-0.8,-0.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,11.6,11.6,1009.6,1009.9,5.4,5.5,0.3,1.6,122.5,25.8,...,,,443.6,463.5,-0.1,-0.1,0,0,-0.8,-0.7
140,11.6,11.6,1009.7,1010.0,5.4,5.4,0.0,1.4,0.0,14.1,...,,,443.7,457.2,-0.1,-0.1,0,0,-0.8,-0.7
141,11.6,11.6,1009.7,1010.0,5.4,5.5,0.0,1.6,0.0,45.0,...,,,449.9,456.6,-0.1,-0.1,0,0,-0.8,-0.7
142,11.6,11.6,1009.7,1010.1,5.4,5.5,0.3,2.1,230.0,81.6,...,,,445.7,457.2,-0.1,-0.1,0,0,-0.8,-0.7


In [49]:
dates_df_ex = dates_df_ex.where(status_df_ex == 0.0, other=None)
dates_df_ex

Unnamed: 0,UBat MIN,UBat MAX,Pressure MIN,Pressure MAX,PrTemp MIN,PrTemp MAX,WS MIN,WS MAX gust,WD MIN,WD MAX gust,...,PicoSoilTemp 2 MIN,PicoSoilTemp 2 MAX,CO2 MIN,CO2 MAX,UVA Radiation MIN,UVA Radiation MAX,UVB Radiation MIN,UVB Radiation MAX,PAR Radiation MIN,PAR Radiation MAX
0,03:57,03:59,07:59,11:58,03:58,03:59,07:59,07:56,07:59,07:56,...,,,03:56,11:58,03:57,11:56,03:58,11:57,03:56,07:57
1,00:01,00:01,04:02,00:00,00:02,00:02,08:02,04:01,08:02,04:01,...,,,08:00,00:00,08:02,00:01,08:02,00:00,08:02,08:02
2,04:02,04:02,02:00,04:00,12:01,04:02,12:00,02:03,12:00,02:03,...,,,02:01,12:03,02:03,12:01,02:02,12:00,02:00,12:01
3,10:00,06:01,06:01,10:01,10:02,10:02,10:03,06:02,10:03,06:02,...,,,10:00,10:01,10:01,10:01,10:02,06:03,10:01,10:00
4,14:02,14:02,09:01,01:00,14:02,14:02,14:03,01:03,14:03,01:03,...,,,09:03,01:01,09:00,14:01,09:00,14:01,09:02,14:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,00:59,00:57,00:57,04:56,00:58,00:57,00:57,08:57,00:57,08:57,...,,,04:56,00:59,04:57,00:56,04:57,00:57,04:58,08:59
140,04:58,04:57,04:56,12:59,04:58,04:58,02:56,02:56,02:56,02:56,...,,,04:57,02:56,02:59,02:56,02:57,04:57,02:58,02:56
141,10:58,10:59,06:59,10:59,10:58,10:59,14:59,10:57,14:59,10:57,...,,,06:57,06:59,10:59,14:59,10:59,10:58,10:57,14:59
142,14:57,14:58,14:58,09:59,14:59,14:58,14:56,09:58,14:56,09:58,...,,,09:56,14:57,09:58,01:56,09:59,01:56,09:57,01:57


## Class format (en proceso)
**[Un resumen de todo lo anterior en formato clase]**
Por revisar:
* Resultados de fechas incorrectos ¿por qué? (datatype EX)
* Agregar timestamp a datatype AV
* 4 bytes iniciales?
* Función para guardar en .txt u otros
* Opción de procesar múltiples archivos .BIN para obtener un solo .txt


In [57]:
class THIESData:
    # Bytes per row
    ROW_SIZE = {'av': 99, 'ex': 292}
    # Parameters per row
    DESC_SIZE = {'av': 19, 'ex': 32}
    # IGNORE first 4 bytes of each row (timestamp?) PENDIENTE !!!
    OFFSET = 4 

    def __init__(self, binfile_path: str, inifile_path: str, datatype: str) -> None:
        d = datatype.lower().strip() 
        if d not in ['av', 'ex']:
            raise ValueError("Invalid datatype. Expected 'av' (average values) or 'ex' (minmax values).")
        else:
            self._bpr = THIESData.ROW_SIZE[d]
            
        self._datatype = datatype
        self._binfile = self._read_binfile(binfile_path)

        self.descfile = self._read_descfile(inifile_path)
        self.nparameters = len(self.descfile)
        self.nbytes = len(self._binfile)
        self.nrows = int(self.nbytes / self._bpr)
        self.statusDF = None
        self.dataDF = None
        self.datesDF = None  

        self._make_dataframe()

    def _bytes2datetime(self, b: bytes, full_date=False):
        # b is size 4
        bits = bitarray()
        bits.frombytes(b)
        sec = int(bits[0:6][::-1].to01(),2)
        min = int(bits[6:12][::-1].to01(),2)
        hr = int(bits[12:17][::-1].to01(),2)
        time = f'{str(hr).zfill(2)}:{str(min).zfill(2)}'
        if full_date is False:
            return time
        day = int(bits[17:22][::-1].to01(),2)
        mon = int(bits[22:26][::-1].to01(),2)
        yr = int(bits[26:][::-1].to01(),2)
        date = f'20{yr}/{str(mon).zfill(2)}/{str(day).zfill(2)}'
        return date + ' ' + time + f':{str(sec).zfill(2)}'
    
    def _read_binfile(self, path: str) -> bytes:
        with open(path, "rb") as bin_file:
            binfile = bin_file.read()
        return binfile
    
    def _read_descfile(self, path: str) -> dict:
        config = configparser.ConfigParser()
        config.read(path)
        data_dict = {}
        for section in config.sections():
            section_dict = dict(config.items(section))
            data_dict[int(section)] = section_dict
        return data_dict
    
    def _make_dataframe(self) -> None:
        byterows = [self._binfile[i*self._bpr + THIESData.OFFSET : (i+1)*self._bpr ] for i in range(0, self.nrows)]
        data_arr = np.zeros((self.nrows, self.nparameters))
        status_arr = np.zeros((self.nrows, self.nparameters))
        
        if self._datatype == 'av':    
            for i, row in enumerate(byterows):
                for j in range(self.nparameters):
                    # Status = byte 1
                    status = row[j*5]
                    status_arr[i, j] = status

                    # Value = bytes 2-5, float
                    value = struct.unpack('<f', row[j*5+1 : j*5+5])[0]
                    data_arr[i, j] = round(value,1)
            self.dataDF = pd.DataFrame(data_arr).rename(columns={i: self.descfile[i+1]['name'] for i in range(self.nparameters)})
            self.statusDF = pd.DataFrame(status_arr).rename(columns={i: self.descfile[i+1]['name'] for i in range(self.nparameters)})
            
            self.dataDF = self.dataDF.where(self.statusDF == 0.0, other=None)
            
        else:
            dates_arr = np.empty((self.nrows, self.nparameters), dtype=object)
            for i, row in enumerate(byterows):
                for j in range(self.nparameters):
                    # Status = byte 1
                    status = row[j*9]
                    status_arr[i, j] = status

                    # Value = bytes 2-5, float
                    value = struct.unpack('<f', row[j*9 + 1 : j*9 + 5])[0]
                    data_arr[i, j] = round(value,1)

                    # Datetime = bytes 6-9
                    datetime = self._bytes2datetime(row[j*9 + 5 : j*9 + 9], full_date=False) 
                    dates_arr[i, j] = datetime

            self.dataDF = pd.DataFrame(data_arr).rename(columns={i: self.descfile[i+1]['name'] for i in range(self.nparameters)})
            self.statusDF = pd.DataFrame(status_arr).rename(columns={i: self.descfile[i+1]['name'] for i in range(self.nparameters)})
            self.datesDF = pd.DataFrame(dates_arr).rename(columns={i: self.descfile[i+1]['name'] for i in range(self.nparameters)})

            self.dataDF = self.dataDF.where(self.statusDF == 0.0, other=None)
            self.datesDF = self.datesDF.where(self.statusDF == 0.0, other=None)
    
    def write_csv(self, path: str) -> None:
        with open(path, 'w') as outfile:
            outfile.write(self.dataDF.to_csv())
    
    def __repr__(self) -> str:
        return str(self.dataDF)
    
    def _repr_html_(self):
        return self.dataDF._repr_html_()

In [59]:
data1 = THIESData(path_bin_av, path_ini_av, 'av')
data1

Unnamed: 0,UBat,Pressure,PrTemp,Precipitation,WS,WD,WS std. dev.,WD std. dev.,Humidity,AirTemperature,Radiation,PicoMoisture 1,PicoSoilTemp 1,PicoMoisture 2,PicoSoilTemp 2,CO2,UVA Radiation,UVB Radiation,PAR Radiation
0,11.7,1013.7,8.0,0,0.8,24.0,0.1,24.3,100,5.0,-0.1,,,,,515.8,-0.1,0,-0.8
1,11.7,1013.5,8.0,0,0.6,329.0,0.2,82.1,100,5.0,-0.2,,,,,506.4,-0.1,0,-0.8
2,11.7,1013.4,8.0,0,0.6,12.3,0.2,27.6,100,4.8,-0.2,,,,,518.1,-0.1,0,-0.7
3,11.7,1013.2,7.9,0,0.8,24.6,0.3,25.1,100,4.7,-0.2,,,,,515.2,-0.1,0,-0.7
4,11.7,1013.1,7.8,0,0.8,17.5,0.1,45.2,100,4.7,-0.2,,,,,504.1,-0.1,0,-0.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,11.6,1009.7,5.4,0,0.7,13.8,0.3,44.2,100,3.0,-0.6,,,,,453.8,-0.1,0,-0.7
140,11.6,1009.9,5.4,0,0.5,69.3,0.3,35.9,100,3.0,-0.2,,,,,451.1,-0.1,0,-0.8
141,11.6,1009.8,5.4,0,0.8,69.6,0.4,34.1,100,3.2,-0.3,,,,,453.7,-0.1,0,-0.7
142,11.6,1009.9,5.5,0,1.0,55.2,0.4,51.5,100,3.2,-0.2,,,,,451.1,-0.1,0,-0.7


In [60]:
data2 = THIESData(path_bin_ex, path_ini_ex, 'ex')
data2

Unnamed: 0,UBat MIN,UBat MAX,Pressure MIN,Pressure MAX,PrTemp MIN,PrTemp MAX,WS MIN,WS MAX gust,WD MIN,WD MAX gust,...,PicoSoilTemp 2 MIN,PicoSoilTemp 2 MAX,CO2 MIN,CO2 MAX,UVA Radiation MIN,UVA Radiation MAX,UVB Radiation MIN,UVB Radiation MAX,PAR Radiation MIN,PAR Radiation MAX
0,11.7,11.7,1013.6,1013.8,8.0,8.1,0.4,1.2,327.5,16.6,...,,,485.7,527.7,-0.1,-0.1,0,0,-0.8,-0.7
1,11.7,11.7,1013.4,1013.7,8.0,8.0,0.0,1.0,0.0,97.5,...,,,493.5,519.1,-0.1,-0.1,0,0,-0.8,-0.7
2,11.7,11.7,1013.1,1013.6,7.9,8.0,0.3,0.9,17.5,7.5,...,,,497.5,533.4,-0.1,-0.1,0,0,-0.8,-0.7
3,11.7,11.7,1013.0,1013.3,7.9,8.0,0.3,1.4,347.5,37.5,...,,,501.1,526.9,-0.1,-0.1,0,0,-0.8,-0.7
4,11.7,11.7,1013.0,1013.2,7.8,7.9,0.4,1.1,355.0,85.8,...,,,493.6,515.8,-0.1,-0.1,0,0,-0.8,-0.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,11.6,11.6,1009.6,1009.9,5.4,5.5,0.3,1.6,122.5,25.8,...,,,443.6,463.5,-0.1,-0.1,0,0,-0.8,-0.7
140,11.6,11.6,1009.7,1010.0,5.4,5.4,0.0,1.4,0.0,14.1,...,,,443.7,457.2,-0.1,-0.1,0,0,-0.8,-0.7
141,11.6,11.6,1009.7,1010.0,5.4,5.5,0.0,1.6,0.0,45.0,...,,,449.9,456.6,-0.1,-0.1,0,0,-0.8,-0.7
142,11.6,11.6,1009.7,1010.1,5.4,5.5,0.3,2.1,230.0,81.6,...,,,445.7,457.2,-0.1,-0.1,0,0,-0.8,-0.7
