In [None]:
# default_exp parser

# Parser
>  Este módulo lida com o processamento e análise dos metadados e dados do espectro dos blocos

In [None]:
#hide
import sys, os
from pathlib import Path

# Insert in Path Project Directory
sys.path.insert(0, str(Path().cwd().parent))

%load_ext autoreload
%load_ext line_profiler
%autoreload 2 

In [None]:
#exporti
import os
from pathlib import Path
from typing import *
from collections import defaultdict, namedtuple
from fastcore.basics import partialler, listify
from fastcore.utils import parallel
from fastcore.foundation import L
from rfpye.constants import BYTES_HEADER, ENDMARKER, KEY_ATTRS
from rfpye.blocks import MAIN_BLOCKS
from rfpye.utils import get_files, getattrs, bin2int, bin2str
from rfpye.cyparser import cy_extract_compressed
from loguru import logger
import pandas as pd
import numpy as np

  warn("Couldn't import ipywidgets properly, progress bar will use console behavior")


In [None]:
#exporti
logger.add("parser.log", rotation="1 month", compression='zip', backtrace=True, diagnose=True)

1

## Processamento do Arquivo `.bin` e criação dos diferentes tipos de blocos
A função seguinte `parse_bin` recebe um arquivo `.bin` e mapeia os blocos contidos nele retornando um dicionário:
 * `file_version`: Versão do arquivo `.bin`
 * `blocks`: Dicionário com os blocos do arquivo `.bin`. Cada tipo de bloco tem sua Classe Própria
 
 O dicionário `blocks` retornado tem como chave uma tupla (tipo de bloco, `thread_id`) e os valores como uma lista com os blocos ( classes ) extraídos sequencialmente.

O tipo de bloco é a natureza do dado contido, por exemplo: 40 - GPS, 67 - Dado Espectral. O `thread_id` discrimina em geral diferentes "faixas" do mesmo tipo de dado. Para dados espectrais, por exemplo, diferentes thread_id representam varreduras de faixas de frequência distintas

In [None]:
files = get_files(r'D:\OneDrive - ANATEL\Sensores', extensions=['.bin'])

In [None]:
#export
def parse_bin(
    bin_file: Union[str, Path],
    slice_: slice = None,
) -> dict:
    """Receives a CRFS binfile and return a dictionary with its different blocks
    A block is a piece of the .bin file with a known start and end and that contains different types of information.
    It has several fields: file_type, header, data and footer.
    Each field has lengths and information defined in the documentation.
    Args:
        bin_file (Union[str, Path]): path to the bin file
        btypes (Iterable, optional): Restrict processing to only these block types. Defaults to MAIN_BLOCKS.keys().
        slice_ (slice, optional): Slice to cut the bin file if desired. Defaults to None.
        bytes_header (int, optional): File Header Size. Defaults to BYTES_HEADER.
        marker (bytes, optional): Byte marker delimiting the end of one block. Defaults to ENDMARKER.

    Returns:
        Dictionary with the bin_file version, string info and metadata from the different blocks.
    """
    bin_file = Path(bin_file)
    with open(bin_file, mode="rb") as bfile:
        # The first block of the file is the header and is 36 bytes long.
        header = bfile.read(BYTES_HEADER)
        body = bfile.read()
    if slice_ is not None:
        assert (
            slice_.start >= BYTES_HEADER
        ), f"The start of your slice has to be >= {BYTES_HEADER}, you passed {slice_.start} "
        body = body[slice_]
    return {
        "filename" : bin_file.name,
        "file_version": bin2int(header[:4]),
        "string": bin2str(header[4:]),
        "blocks": classify_blocks(body.split(ENDMARKER)),
    }

In [None]:
file = files.shuffle()[0]

A função a seguir recebe os bytes lidos do arquivo `.bin` e mapeia esses bytes em diferentes classes de acordo com o tipo de bloco

In [None]:
#export
def evaluate_checksum(byte_block: bytes)->int:
    """Receives a byte_block and verify if the calculated checksum is equal to the one registed in the specific byte"""
    try:
        checksum = np.frombuffer(byte_block[-4:], dtype=np.uint32).item()
        calculated_checksum = (
            np.frombuffer(byte_block[:-4], dtype=np.uint8).sum().astype(np.uint32).item()
        )
    except ValueError:
        return -1
    return checksum if calculated_checksum == checksum else -1

In [None]:
#export
def byte2base_block(byte_block: bytes) -> tuple:
    """Receives a byte block from the bin file and returns a dictionary with the attributes
    'thread_id', 'size', 'type', 'data', 'checksum' or an empty dict in case any error is identified.
    """
    if byte_block == b"": return ()
    base_block = namedtuple('base_block', ['thread_id', 'size', 'type', 'data', 'checksum'])
    checksum = evaluate_checksum(byte_block)
    size = bin2int(byte_block[4:8])
    data = byte_block[12:-4]
    # Discard the block if a fail in checksum or in case of a truncated block
    if checksum == -1 or size != len(data): return () 
    return base_block(bin2int(byte_block[:4]), size, bin2int(byte_block[8:12]), data, checksum)

In [None]:
#export
def create_block(byte_block):
    base_block = byte2base_block(byte_block)
    if not base_block: return None
    block_type = base_block.type
    constructor = MAIN_BLOCKS.get(block_type)
    if not constructor:
        logger.debug(f'This block type constructor is not implemented: {block_type}')
        return None
    block = constructor(base_block)
    if getattr(block, "gerror", -1) != -1 or getattr(block, 'gps_status', -1) == 0:
        logger.debug(f'Block with error: {block_type}')
        return None #spectral or gps blocks with error
    return block 
    

In [None]:
#export
def classify_blocks(byte_blocks: Iterable) -> defaultdict:
    """Receives an iterable L with binary blocks and returns a defaultdict with a tuple (block types, thread_id) as keys and a list of the Class Blocks as values
    :param file: A string or pathlib.Path like path to a `.bin`file generated by CFRS - Logger
    :return: A Dictionary with block types as keys and a list of the Class Blocks available as values
    """
    map_block: Mapping[Tuple, L] = defaultdict(L)
    for byte_block in byte_blocks:
        block = create_block(byte_block)
        if not block: continue
        attrs = getattrs(block, attrs=KEY_ATTRS.get(block.type, ('type', 'thread_id')))
        map_block[attrs].append(block)
    return map_block

In [None]:
#hide
from nbdev.export import notebook2script; notebook2script()

Converted 00_main.ipynb.
Converted 01_parser.ipynb.
Converted 02_utils.ipynb.
Converted 03_blocks.ipynb.
Converted 04_constants.ipynb.
Converted 05_stats.ipynb.
Converted 06_meta.ipynb.
No export destination, ignored:
#exporti
def _extract_uncompressed(
    blocks: Iterable, rows: int, cols: int, min_level: float, dtype=np.float16
):
    levels = np.full((rows, cols), min_level, dtype=dtype)
    block_data = "raw_data" if dtype == np.uint8 else "block_data"
    for b, block in enumerate(blocks):
        levels[b] = getattr(block, block_data)
    return levels
No export destination, ignored:
#export
def extract_level(spectrum_blocks: L, dtype=np.float32) -> pd.DataFrame:
    """Receives a mapping `spectrum_blocks` and returns the Matrix with the Levels as values, Frequencies as columns and Block Number as index.
    :param pivoted: If False, optionally returns an unpivoted version of the Matrix
    """
    assert len(spectrum_blocks), "The spectrum block list is empty"
    #     spectru

AttributeError: 'NoneType' object has no attribute 'start'