In [None]:
# default_exp parser

# Parser
> Este módulo processa o arquivo bin e extrai os metadados e dados do espectro dos blocos, além de criar estatísticas das medições.
  en: This module process the bin file extracting its metadata and spectrum levels besides extracting useful statistics.
  fr: Ce module traite le fichier bin et extrait les métadonnées et les données spectrales des blocs, en plus de créer des statistiques de mesure.

In [None]:
#export
import sys, os
from pathlib import Path

# Insert in Path Project Directory
sys.path.insert(0, str(Path().cwd().parent))

In [None]:
%load_ext autoreload
%load_ext line_profiler
%load_ext cython
%autoreload 2 

In [None]:
#exporti
import os
import gc
from pathlib import Path
from typing import *
from dataclasses import make_dataclass
from fastcore.basics import partialler
from fastcore.utils import parallel
from fastcore.foundation import L, GetAttr
from rfpye.constants import *
from rfpye.blocks import MAIN_BLOCKS, BaseBlock
from rfpye.utils import get_files, getattrs, bin2int, bin2str, cached
from rfpye.cyparser import cy_extract_compressed
from loguru import logger
import pandas as pd
import numpy as np
from rich import print

# For scripts
config = {
    "handlers": [
        {
            "sink": "parser.log",
            "serialize": True,
            "rotation": "1 month",
            "compression": "zip",
            "backtrace": True,
            "diagnose": True,
        },
    ],
}
logger.configure(**config)

  warn("Couldn't import ipywidgets properly, progress bar will use console behavior")


[1]

In [None]:
#exports
class CrfsGPS:
    """Class with the GPS Attributes from the CRFS Bin File"""
    def __init__(self) -> None:
        self._data: L = L()

    def __len__(self):
        return len(self._data)

    def __getitem__(self, key):
        return self._latitude[key], self._longitude[key], self._altitude[key], self._num_satellites[key]

    def __iter__(self):
        return zip(self._latitude, self._longitude, self._altitude, self._num_satellites)

    @cached
    def _gps_datetime(self):
        return self._data.attrgot("gps_datetime")

    @cached
    def _latitude(self):
        return self._data.attrgot("latitude")

    @cached
    def _longitude(self):
        return self._data.attrgot("longitude")

    @cached
    def _altitude(self):
        return self._data.attrgot("altitude")

    @cached
    def _num_satellites(self):
        return self._data.attrgot("num_satellites")

    @property
    def latitude(self) -> float:
        return np.median(self._latitude) if self._latitude else -1

    @property
    def longitude(self) -> float:
        return np.median(self._longitude) if self._longitude else -1

    @property
    def altitude(self) -> float:
        return np.median(self._altitude) if self._altitude else -1

    @property
    def num_satellites(self) -> float:
        return np.median(self._num_satellites) if self._num_satellites else 0

    def __repr__(self):
        return f"GPS Data - Median of Coordinates: {self.latitude:.5f}:{self.longitude:.5f} Altitude: {self.altitude:.2f} #Satellites: {self.num_satellites:.1f}"


class CrfsSpectrum(GetAttr):
    """Class with the metadata and levels of a spectrum block from a CRFS Bin File"""

    def __init__(self, metadata, precision=np.float32):
        self.default = metadata
        self._data: L = L()
        self.precision = precision

    def __getitem__(self, key):
        return self.timestamp[key], self.levels[key]

    def __iter__(self):
        return zip(self.timestamp, self.levels)

    def __len__(self):
        return len(self._data)

    def __repr__(self):
        return repr(self.default)

    def __str__(self):
        return f"""Blocks of Type: {self.type}, Thread_id: {self.thread_id}, Start: {self.start_mega} MHz, Stop: {self.stop_mega} MHz"""

    @cached
    def timestamp(self):
        return self._data.attrgot('wallclock_datetime')

    @cached
    def start_dateidx(self):
        return getattr(self._data[0], 'wallclock_datetime').item()

    @cached
    def stop_dateidx(self):
        return getattr(self._data[-1], 'wallclock_datetime').item()

    @cached
    def levels(self):
        """Return the spectrum levels"""
        if self.type in UNCOMPRESSED:
            levels = np.empty((len(self._data), self.ndata), dtype=self.precision)
            for i, level in enumerate(self._data.attrgot('levels')):
                levels[i,:] = level
            # levels = np.concatenate(self._data.attrgot('levels')).reshape((-1, self.ndata))
        elif self.type in COMPRESSED:
            levels = cy_extract_compressed(
                list(self._data.attrgot('levels')),
                len(self._data),
                int(self.ndata),
                int(self.thresh),
                float(self.minimum),
            )
        else:
            raise ValueError(
                "The current block is not of type spectrum or it's not implemented yet"
            )
        if self.precision != np.float32:
            levels = levels.astype(self.precision)
        return levels

    @cached
    def frequencies(self) -> np.ndarray:
        return np.linspace(self.start_mega, self.stop_mega, num=self.ndata)

    def matrix(self):
        """Returns the matrix formed from the spectrum levels and timestamp"""
        index = self.timestamp if len(self.timestamp) == len(self) else None
        data = pd.DataFrame(self.levels, index=index, columns=self.frequencies)
        data.columns.name = "Frequencies"
        data.index.name = "Time"
        return data

In [None]:
#exporti
def append_spec_data(block_type, fluxos, block, precision=np.float32) -> None:
    """Append the spectrum data to the fluxos dict"""
    keys, vals = getattrs(block, KEY_ATTRS.get(block_type), as_tuple=True)
    if vals not in fluxos:
        metadata = make_dataclass('Spectrum', fields=[(k,type(k)) for k in keys], eq=True, frozen=True)
        fluxos[vals] = CrfsSpectrum(metadata(*vals), precision)
    fluxos[vals]._data.append(block)

## Processamento do Arquivo `.bin` e criação dos diferentes tipos de blocos

In [None]:
#exporti
def evaluate_checksum(file, next_block, data_size) -> int:
    """Receives a byte_block and verify if the calculated checksum is equal to the one registed in the specific byte"""
    try:
        checksum = np.frombuffer(file.read(4), np.uint32).item()
    except ValueError:
        logger.error(f"Erro na leitura do checksum, posição: {file.tell()}")
        return None
    block_size = file.tell() - next_block
    file.seek(-block_size, 1) # Go back to the beginning of the block
    calculated_checksum = (
            np.frombuffer(file.read(12+data_size), dtype=np.uint8)
            .sum()
            .astype(np.uint32)
            .item()
        )
    file.seek(4,1) # skip checksum
    if checksum != calculated_checksum:
        logger.error(f"Checksum diferente: {checksum} != {calculated_checksum}. Posicao: {file.tell()}")
        return None
    return checksum

In [None]:
#exporti
def buffer2base_block(file, next_block: int) -> Union[BaseBlock, None]:
    """Receives an opened file buffer from the bin file and returns a dataclass with the attributes
    'thread_id', 'size', 'type', 'data', 'checksum' or None in case any error is identified.
    """
    start = file.tell()
    thread_id = np.frombuffer(file.read(4), np.int32).item()
    block_size = np.frombuffer(file.read(4), np.int32).item()
    block_type = np.frombuffer(file.read(4), np.int32).item()
    data_block = file.read(block_size)
    if (checksum := evaluate_checksum(file, next_block, block_size)) is None:
        file.seek(start, 0)
        while file.read(4) not in (b'', b'UUUU'):
            pass
        return None, None 
    if (eof := file.read(4)) != b'UUUU':
        logger.error(f"EOF diferente de UUUU: {eof}, posicao: {file.tell()}")
        return None, None                      
    return block_type, BaseBlock(thread_id, block_size, block_type, data_block, checksum)

A função a seguir recebe os bytes lidos do arquivo `.bin` e mapeia esses bytes em diferentes classes de acordo com o tipo de bloco

In [None]:
#exporti
def create_block(file, next_block) -> Tuple:
    """Receives a byte_block, and converts it into one of the main classes
    Args: byte_block: A byte block directly returned from the file
    Returns: The Instance of the Block Type or None in case of error
    """
    block_type, base_block = buffer2base_block(file, next_block)
    if block_type is None:
        return None, None
    constructor = MAIN_BLOCKS.get(block_type)
    if not constructor:
        logger.warning(f"This block type constructor is not implemented: {block_type}")
        return None, None
    block = constructor(base_block)
    if getattr(block, "gerror", -1) != -1 or getattr(block, "gps_status", -1) == 0:
        logger.error("INFO", f"Block with error: {block_type}")
        return None, None  # spectral or gps blocks with error
    return block_type, block

A função a seguir recebe os bytes lidos do arquivo `.bin` e mapeia esses bytes em diferentes classes de acordo com o tipo de bloco

In [None]:
#exports
def parse_bin(bin_file: Union[str, Path], precision=np.float32) -> dict:
    """Receives a CRFS binfile and returns a dictionary with the file metadata, a GPS Class and a list with the different Spectrum Classes
    A block is a piece of the .bin file with a known start and end and that contains different types of information.
    It has several fields: file_type, header, data and footer.
    Each field has lengths and information defined in the documentation.
    Args:
        bin_file (Union[str, Path]): path to the bin file

    Returns:
        Dictionary with the file metadata, file_version, string info, gps and spectrum blocks.
    """
    bin_file = Path(bin_file)
    meta = {}
    fluxos = {}
    gps = CrfsGPS()
    with open(bin_file, mode="rb") as file:
        # The first block of the file is the header and is 36 bytes long.
        header = file.read(BYTES_HEADER)
        meta["filename"] = bin_file.name
        meta["file_version"] = bin2int(header[:4])
        meta["string"] = bin2str(header[4:])
        file_size = file.seek(0, 2)
        file.seek(36, 0)
        while (next_block := file.tell()) < file_size:
            block_type, block = create_block(file, next_block)
            if block is None: 
                continue
            if block_type in (2, 40):
                gps._data.append(block)
            elif block_type in VECTOR_BLOCKS:
                append_spec_data(block_type,fluxos, block, precision)
            else:
                meta.update(getattrs(block, KEY_ATTRS.get(block_type)))
    meta["gps"] = gps
    meta["spectrum"] = L(fluxos.values())
    meta['hostname'] = meta['hostname'][:2].upper() + meta['hostname'][2:]
    return meta                 


In [None]:
files = get_files('binfiles/Occ')
file = files[0]

In [None]:
dados = parse_bin(file)
dados['spectrum']

(#1) [Spectrum(type=65, thread_id=121, start_mega=80, stop_mega=110, dtype='dBm', ndata=1536, processing='peak', antuid=0)]

In [None]:
print(dados['spectrum'].attrgot('thread_id'))
print(dados['spectrum'].map(len))

In [None]:
dados['spectrum'][0].matrix().transpose().iloc[379:390,]

Time,2019-12-21 09:00:01.367337,2019-12-21 09:15:01.357259,2019-12-21 09:30:01.357357,2019-12-21 09:45:01.357273,2019-12-21 10:00:01.419225,2019-12-21 10:15:01.457292,2019-12-21 10:30:01.457319,2019-12-21 10:45:01.457347,2019-12-21 11:00:01.557361,2019-12-21 11:15:01.657314,...,2020-03-17 05:00:00.717407,2020-03-17 05:15:00.727240,2020-03-17 05:30:00.817241,2020-03-17 05:45:01.165917,2020-03-17 06:00:01.117366,2020-03-17 06:15:01.117384,2020-03-17 06:30:01.117365,2020-03-17 06:45:01.356074,2020-03-17 07:00:01.417270,2020-03-17 07:15:01.437418
Frequencies,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
87.407166,5.0,3.5,3.5,2.5,0.0,0.0,0.0,0.0,0.0,0.0,...,3.0,4.0,1.5,2.0,3.0,2.0,6.0,2.0,2.0,4.0
87.42671,26.0,25.0,23.5,14.0,0.0,0.0,0.0,0.0,0.0,0.0,...,24.0,29.0,24.0,25.5,28.5,23.5,27.5,26.5,25.5,27.0
87.446254,68.0,66.5,64.0,57.5,48.5,49.5,48.5,47.0,45.5,52.0,...,66.5,67.5,69.0,71.0,69.0,68.5,73.0,73.5,72.0,70.5
87.465798,97.5,93.5,94.5,94.0,100.0,100.0,100.0,100.0,100.0,100.0,...,96.5,95.5,96.5,96.5,97.5,96.5,97.0,97.0,96.0,95.5
87.485342,98.5,98.5,99.5,99.0,100.0,100.0,100.0,100.0,100.0,100.0,...,99.0,98.5,99.5,100.0,99.5,99.5,99.0,100.0,99.0,99.0
87.504886,94.5,93.5,93.0,95.0,100.0,100.0,100.0,100.0,100.0,100.0,...,93.5,94.0,95.0,95.5,96.5,96.5,96.0,96.0,96.0,94.5
87.52443,72.5,69.0,67.5,55.0,42.0,40.5,41.5,42.0,42.0,46.0,...,68.5,69.5,68.0,72.0,73.0,69.5,74.5,71.0,70.5,67.5
87.543974,21.5,26.5,20.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,...,23.5,25.5,20.0,25.5,26.5,23.0,29.5,21.5,24.0,24.0
87.563518,2.0,1.5,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.5,2.5,1.0,1.0,2.5,2.5,5.5,0.0,3.0,2.5
87.583062,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0


In [None]:
%%cython --annotate
cimport cython

ctypedef object CrfsGPS

@cython.boundscheck(False)
@cython.wraparound(False)



Error compiling Cython file:
------------------------------------------------------------
...

ctypedef object CrfsGPS

@cython.boundscheck(False)
@cython.wraparound(False)
^
------------------------------------------------------------

C:\Users\rsilva\.ipython\cython\_cython_magic_98c655412a46cff311afb5f5b8b31186.pyx:7:0: Decorators can only be followed by functions or classes
