# THIES Binary Processor

THIES DL16 exports two types of data in two different directories: 
* `ARCH_AV1` for avarage values of a 10 min span. Consists of 19 parameters of DataType Float.
* `ARCH_EX1` for both min and max values measured within the 10 min span. Consists of 32 parameters of DataType FloatExtrem.

There is a file for each day measured. File names are in `YYYYMMDD.BIN` format.

In [118]:
import numpy as np
from bitarray import bitarray
import pandas as pd
import configparser
import struct

# Size of row (bytes)
AV_ROW_SIZE = 99
EX_ROW_SIZE = 292
# Size (n of parameters)
AV_DESC_SIZE = 19
EX_DESC_SIZE = 32
# IGNORE first 4 bytes of each row (timestamp?) PENDIENTE !!!
OFFSET = 4 
STATUS_CHAR = {0b00000000 : 0,  # Status OK
              0b10000000 : '-', # Sensor is deactivated in the sensor configuration
              0b01000000 : '-', # Datalogger is in maintenance mode
              0b00100000 : '%', # Timeout (for ex. digitalization takes too long)
              0b00010000 : '!', # Value is out of valid range
              0b00001000 : '@', # Difference between 2 consecutive values is too far
              0b00000100 : '#', # Filling level of the averaging buffer is too low
              0b00000010 : '?', # Error depending on measurement type (for ex. ADC overflow)
              0b00000001 : '?'} # Error depending on measurement type (for ex. cable break)

In [122]:
def read_descfile(path: str) -> dict:
    ''' 
    Input: path DESCFILE.INI
    Returns: dict 
        key is index [i]
        value is dict with parameters from .ini
    '''
    config = configparser.ConfigParser()
    config.read(path)
    data_dict = {}
    for section in config.sections():
        section_dict = dict(config.items(section))
        data_dict[int(section)] = section_dict

    return data_dict

In [119]:
path_bin_av = './BINFILES/ARCH_AV1/20240531.BIN'
path_bin_ex = './BINFILES/ARCH_EX1/20240531.BIN'
path_ini_av = './BINFILES/ARCH_AV1/DESCFILE.INI'
path_ini_ex = './BINFILES/ARCH_EX1/DESCFILE.INI'

descfile_av = read_descfile(path_ini_av)
descfile_ex = read_descfile(path_ini_ex)

## AV Files
**[Casi listo. Falta leer los 4 primeros bytes y agregar hora-fecha a cada fila.]**

Example file: 31 mayo, 2024
* File: 14.256 bytes
* Rows: 144
* Parameters: 19
* Bytes per row: 99 bytes

In [120]:
with open(path_bin_av, "rb") as bin_file:
    binfile = bin_file.read()

size = len(binfile)
rows = size // AV_ROW_SIZE
print(f'{size} bytes in {rows} rows')


14256 bytes in 144 rows


In [123]:
# Makes array with 144 rows of 95 bytes each
byterows = [binfile[i*AV_ROW_SIZE + OFFSET : (i+1)*AV_ROW_SIZE ] for i in range(0, rows)]

In [124]:
data_arr = np.zeros((rows,AV_DESC_SIZE))
status_arr = np.zeros((rows,AV_DESC_SIZE))
for i, row in enumerate(byterows):
    for j in range(0,19):
        # Status = byte 1
        status = row[j*5]
        status_arr[i][j] = status

        # Value = bytes 2-5, float
        value = struct.unpack('<f', row[j*5+1 : j*5+5])[0]
        data_arr[i][j] = round(value,1)    

In [134]:
data_df = pd.DataFrame(data_arr)
status_df = pd.DataFrame(status_arr)
data_df = data_df.rename(columns={i: descfile_av[i+1]['name'] for i in range(19)})
status_df = status_df.rename(columns={i: descfile_av[i+1]['name'] for i in range(19)})
data_df


Unnamed: 0,UBat,Pressure,PrTemp,Precipitation,WS,WD,WS std. dev.,WD std. dev.,Humidity,AirTemperature,Radiation,PicoMoisture 1,PicoSoilTemp 1,PicoMoisture 2,PicoSoilTemp 2,CO2,UVA Radiation,UVB Radiation,PAR Radiation
0,11.7,1013.7,8.0,0.0,0.8,24.0,0.1,24.3,100.0,5.0,-0.1,0.0,0.0,0.0,0.0,515.8,-0.1,0.0,-0.8
1,11.7,1013.5,8.0,0.0,0.6,329.0,0.2,82.1,100.0,5.0,-0.2,0.0,0.0,0.0,0.0,506.4,-0.1,0.0,-0.8
2,11.7,1013.4,8.0,0.0,0.6,12.3,0.2,27.6,100.0,4.8,-0.2,0.0,0.0,0.0,0.0,518.1,-0.1,0.0,-0.7
3,11.7,1013.2,7.9,0.0,0.8,24.6,0.3,25.1,100.0,4.7,-0.2,0.0,0.0,0.0,0.0,515.2,-0.1,0.0,-0.7
4,11.7,1013.1,7.8,0.0,0.8,17.5,0.1,45.2,100.0,4.7,-0.2,0.0,0.0,0.0,0.0,504.1,-0.1,0.0,-0.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,11.6,1009.7,5.4,0.0,0.7,13.8,0.3,44.2,100.0,3.0,-0.6,0.0,0.0,0.0,0.0,453.8,-0.1,0.0,-0.7
140,11.6,1009.9,5.4,0.0,0.5,69.3,0.3,35.9,100.0,3.0,-0.2,0.0,0.0,0.0,0.0,451.1,-0.1,0.0,-0.8
141,11.6,1009.8,5.4,0.0,0.8,69.6,0.4,34.1,100.0,3.2,-0.3,0.0,0.0,0.0,0.0,453.7,-0.1,0.0,-0.7
142,11.6,1009.9,5.5,0.0,1.0,55.2,0.4,51.5,100.0,3.2,-0.2,0.0,0.0,0.0,0.0,451.1,-0.1,0.0,-0.7


In [137]:
data_df = data_df.where(status_df == 0.0, other=None)
data_df

Unnamed: 0,UBat,Pressure,PrTemp,Precipitation,WS,WD,WS std. dev.,WD std. dev.,Humidity,AirTemperature,Radiation,PicoMoisture 1,PicoSoilTemp 1,PicoMoisture 2,PicoSoilTemp 2,CO2,UVA Radiation,UVB Radiation,PAR Radiation
0,11.7,1013.7,8.0,0,0.8,24.0,0.1,24.3,100,5.0,-0.1,,,,,515.8,-0.1,0,-0.8
1,11.7,1013.5,8.0,0,0.6,329.0,0.2,82.1,100,5.0,-0.2,,,,,506.4,-0.1,0,-0.8
2,11.7,1013.4,8.0,0,0.6,12.3,0.2,27.6,100,4.8,-0.2,,,,,518.1,-0.1,0,-0.7
3,11.7,1013.2,7.9,0,0.8,24.6,0.3,25.1,100,4.7,-0.2,,,,,515.2,-0.1,0,-0.7
4,11.7,1013.1,7.8,0,0.8,17.5,0.1,45.2,100,4.7,-0.2,,,,,504.1,-0.1,0,-0.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,11.6,1009.7,5.4,0,0.7,13.8,0.3,44.2,100,3.0,-0.6,,,,,453.8,-0.1,0,-0.7
140,11.6,1009.9,5.4,0,0.5,69.3,0.3,35.9,100,3.0,-0.2,,,,,451.1,-0.1,0,-0.8
141,11.6,1009.8,5.4,0,0.8,69.6,0.4,34.1,100,3.2,-0.3,,,,,453.7,-0.1,0,-0.7
142,11.6,1009.9,5.5,0,1.0,55.2,0.4,51.5,100,3.2,-0.2,,,,,451.1,-0.1,0,-0.7


## EX Files (en proceso)
**[De aquí para abajo todavía no está completo: usa una estructura de datos diferente, Data Type FloatExtrem. Cada medida de min o max viene con su propio timestamp]**

In [103]:
with open(path_bin_ex, "rb") as bin_file:
    binfile = bin_file.read()

size = len(binfile)
rows = size // EX_ROW_SIZE
print(f'{size} bytes in {rows} rows')


42048 bytes in 144 rows


In [106]:
byterows = [binfile[i*EX_ROW_SIZE + OFFSET : (i+1)*EX_ROW_SIZE] for i in range(0, rows)]
dfdata_ex = np.zeros((rows,EX_DESC_SIZE*2))
dfstatus_ex = np.zeros((rows,EX_DESC_SIZE))

for i, row in enumerate(byterows):
    for j in range(EX_DESC_SIZE):
        # Status = byte 1
        status = row[j*9]
        dfstatus_ex[i][j] = status

        # Value = bytes 2-5, float
        value = struct.unpack('<f', row[j*9 + 1 : (j+1)*9])[0]
        dfdata_ex[i][j] = round(value,1)

data_df_ex = pd.DataFrame(dfdata_ex)
status_df_ex = pd.DataFrame(dfstatus_ex)
data_df_ex = data_df_ex.rename(columns={i: descfile_ex[i+1]['name'] for i in range(EX_DESC_SIZE)})
data_df_ex

Unnamed: 0,UBat MIN,UBat MAX,Pressure MIN,Pressure MAX,PrTemp MIN,PrTemp MAX,WS MIN,WS MAX gust,WD MIN,WD MAX gust,...,22,23,24,25,26,27,28,29,30,31
0,11.7,0.0,-0.000000e+00,6.665913e+35,5.425494e+37,4.943875e+32,2.104479e+37,0.000000e+00,2.922499e+20,8.1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,11.7,0.0,0.000000e+00,-0.000000e+00,0.000000e+00,3.215778e+26,4.255641e+37,0.000000e+00,2.928421e+20,8.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,11.7,0.0,-3.737634e+27,-6.509681e+32,0.000000e+00,1.852064e+22,4.339756e+37,-8.507092e+36,2.928598e+20,8.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,11.7,0.0,0.000000e+00,0.000000e+00,0.000000e+00,7.688055e+11,4.449054e+37,-8.507092e+36,2.928646e+20,8.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,11.7,0.0,-0.000000e+00,6.357111e+29,0.000000e+00,2.527084e+08,4.530833e+37,-9.969311e+34,2.928775e+20,7.9,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,11.6,0.0,0.000000e+00,0.000000e+00,2.503951e+29,2.466360e+35,3.217992e+38,-0.000000e+00,2.944994e+20,5.5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
140,11.6,0.0,-5.839915e+25,-0.000000e+00,6.665763e+30,-0.000000e+00,3.235386e+38,-0.000000e+00,2.945107e+20,5.4,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
141,11.6,0.0,5.438844e+16,-0.000000e+00,1.213230e+33,2.002162e+37,3.256778e+38,-0.000000e+00,2.945219e+20,5.5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
142,11.6,0.0,-8.702151e+17,1.445440e+17,6.217269e+33,-0.000000e+00,3.322824e+38,-0.000000e+00,2.945333e+20,5.5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Class format (en proceso)
**[Un resumen de lo anterior en formato clase.]**

In [None]:
class THIESData:
    # Size of row (bytes)
    ROW_SIZE = {'av': 99, 'ex': 292}
    # Size (n of parameters)
    DESC_SIZE = {'av': 19, 'ex': 32}
    # IGNORE first 4 bytes of each row (timestamp?) PENDIENTE !!!
    OFFSET = 4 

    def __init__(self, PATH: str, PATH_INI: str, datatype: str) -> None:
        d = datatype.lower().strip() 
        if d not in ['av', 'ex']:
            raise ValueError("Invalid datatype. Expected 'av' (average values) or 'ex' (minmax values).")
        else:
            self._bpr = THIESData.ROW_SIZE[d]
            
        self._datatype = datatype
        self._binfile = self._read_binfile(PATH)

        self.descfile = self._read_descfile(PATH_INI)
        self.nparameters = len(self.descfile)
        # self.ncolumns = len(self.descfile)
        self.nbytes = len(self._binfile)
        self.nrows = int(self.nbytes / self._bpr)
        self.statusDF = None
        self.dataDF = None  

        self._make_dataframe()
    
    def _read_binfile(self, path: str) -> bytes:
        with open(path, "rb") as bin_file:
            binfile = bin_file.read()
            print(type(binfile))
        return binfile
    
    def _read_descfile(self, path: str) -> dict:
        config = configparser.ConfigParser()
        config.read(path)
        data_dict = {}
        for section in config.sections():
            section_dict = dict(config.items(section))
            data_dict[int(section)] = section_dict
        return data_dict
    
    def _make_dataframe(self):
        data_arr = np.zeros((self.nrows, self.nparameters))
        status_arr = np.zeros((self.nrows, self.nparameters))
        if self._datatype == 'av':
            # Makes array with 144 rows of 95 bytes each
            byterows = [self._binfile[i*self._bpr + THIESData.OFFSET : (i+1)*self._bpr ] for i in range(0, self.nrows)]
            for i, row in enumerate(byterows):
                for j in range(0,19):
                    # Status = byte 1
                    status = row[j*5]
                    status_arr[i][j] = status

                    # Value = bytes 2-5, float
                    value = struct.unpack('<f', row[j*5+1 : j*5+5])[0]
                    data_arr[i][j] = round(value,1)  
            
            self.dataDF = pd.DataFrame(data_arr).rename(columns={i: self.descfile[i+1]['name'] for i in range(19)})
            self.statusDF = pd.DataFrame(status_arr).rename(columns={i: self.descfile[i+1]['name'] for i in range(19)})
            self.dataDF = self.dataDF.where(self.statusDF == 0.0, other=None)
            
        else:
            pass
    
    def write_csv(self, path: str) -> None:
        with open(path, 'w') as outfile:
            outfile.write(self.dataDF.to_csv())
        
        
data = THIESData(path_bin_av, path_ini_av, 'av')
data.dataDF
# data.write_csv('./test.csv')

In [None]:
# DataType: DataTime
def bytes2date(b: bytes) -> bytes:
    # bytes size 4
    barray = bitarray(b)
    sec = barray[0:6]
    min = barray[6:12]
    hrs = barray[12:17]
    day = barray[17:22]
    mon = barray[22:26]
    yrs = barray[26:]

    # print(sec)
    # print(struct.unpack('<i', sec)[0])
    # print(struct.unpack('>i', sec)[0])

# len(test)
# sec, min, hrs, day

# struct.unpack('<i', test)[0]
# bytes2date(binfile[6:10])
# i = 0
# s = binfile[i:i+4]
# bytes2date(s)