# lorapy symbol extraction

In [1]:
%load_ext autoreload
%autoreload 2

import pathlib

import os, sys
from loguru import logger
logger.remove(None)
logger.add(sys.stdout, colorize=True, enqueue=True)

import numpy as np
from six.moves import cPickle
from tqdm import tqdm_notebook

import matplotlib.pyplot as plt 
%matplotlib inline

In [2]:
import lorapy

import scipy as sp
import scipy.signal as spsig
import warnings
import multiprocessing
from functools import partial 
import time

from lorapy.symbols import utils as sym_utils

## setup 

In [3]:
_BASE_DATA_DIR = pathlib.Path('../data')

DOTP_20K_DATA_DIR = _BASE_DATA_DIR.joinpath('symbol-ref/20k')
DOTP_1M_DATA_DIR = _BASE_DATA_DIR.joinpath('symbol-ref/1M')
OTA_1M_DATA_DIR = _BASE_DATA_DIR.joinpath('ota-data-1M')
OUTDOOR_DATA_DIR = _BASE_DATA_DIR.joinpath('outdoor-to-process')

# load

## dat files

In [4]:
loader = lorapy.load_dat(OUTDOOR_DATA_DIR, autoload=True)
loader.file_list

[32m2020-04-17 18:35:41.582[0m | [34m[1mDEBUG   [0m | [36mlorapy.io._base_loader[0m:[36m_validate_data_path[0m:[36m140[0m - [34m[1mset datafile directory: ../data/outdoor-to-process[0m
[32m2020-04-17 18:35:41.584[0m | [1mINFO    [0m | [36mlorapy.io._base_loader[0m:[36m_process_data_dir[0m:[36m153[0m - [1mfound 11 data file(s)[0m


[DatFile(id=0 | name='lora_BW9_SF12_915MHz_1Msps_L46.dat'),
 DatFile(id=1 | name='lora_BW7_SF12_915MHz_1Msps_L3.dat'),
 DatFile(id=2 | name='lora_BW7_SF11_915MHz_1Msps_L4.dat'),
 DatFile(id=3 | name='lora_BW8_SF11_915_25MHz_1Msps_L37.dat'),
 DatFile(id=4 | name='lora_BW2_SF11_914_75MHz_1Msps_L19.dat'),
 DatFile(id=5 | name='lora_BW2_SF11_915MHz_1Msps_L6.dat'),
 DatFile(id=6 | name='lora_BW7_SF10_915_25MHz_1Msps_L33.dat'),
 DatFile(id=7 | name='lora_BW2_SF10_915_25MHz_1Msps_L30.dat'),
 DatFile(id=8 | name='lora_BW7_SF11_914_75MHz_1Msps_L18.dat'),
 DatFile(id=9 | name='lora_BW9_SF12_915MHz_1Msps_L2.dat'),
 DatFile(id=10 | name='lora_BW9_SF10_915MHz_1Msps_L1.dat')]

## dotp files

In [5]:
ploader = lorapy.load_dotp(DOTP_1M_DATA_DIR)
ploader.file_list

[32m2020-04-17 18:35:41.605[0m | [34m[1mDEBUG   [0m | [36mlorapy.io._base_loader[0m:[36m_validate_data_path[0m:[36m140[0m - [34m[1mset datafile directory: ../data/symbol-ref/1M[0m
[32m2020-04-17 18:35:41.607[0m | [1mINFO    [0m | [36mlorapy.io._base_loader[0m:[36m_process_data_dir[0m:[36m153[0m - [1mfound 17 data file(s)[0m


[DotPFile(id=0 | name='lora_symbols_BW1_SF7.p'),
 DotPFile(id=1 | name='lora_symbols_BW2_SF10.p'),
 DotPFile(id=2 | name='lora_symbols_BW8_SF11.p'),
 DotPFile(id=3 | name='lora_symbols_BW8_SF9.p'),
 DotPFile(id=4 | name='lora_symbols_BW1_SF10.p'),
 DotPFile(id=5 | name='lora_symbols_BW1_SF8.p'),
 DotPFile(id=6 | name='lora_symbols_BW1_SF11.p'),
 DotPFile(id=7 | name='lora_symbols_BW8_SF8.p'),
 DotPFile(id=8 | name='lora_symbols_BW9_SF12.p'),
 DotPFile(id=9 | name='lora_symbols_BW2_SF11.p'),
 DotPFile(id=10 | name='lora_symbols_BW8_SF7.p'),
 DotPFile(id=11 | name='lora_symbols_BW2_SF7.p'),
 DotPFile(id=12 | name='lora_symbols_BW7_SF12.p'),
 DotPFile(id=13 | name='lora_symbols_BW2_SF12.p'),
 DotPFile(id=14 | name='lora_symbols_BW9_SF10.p'),
 DotPFile(id=15 | name='lora_symbols_BW1_SF12.p'),
 DotPFile(id=16 | name='lora_symbols_BW1_SF9.p')]

# process 

## symbol correlation settings

In [6]:
_step_dict = {
    1: 100,
    2: 100,
    7: 4,
    8: 2,
    9: 2,
}

## functions

In [7]:
def _load_matching_dotp(bw: int, sf: int):
    return ploader.filter(bw=bw, sf=sf)[0]

def _convert_files(file, dotp_file):
    return file.to_signal(), dotp_file.to_signal()


def _load_and_convert(file):
    file.load()
    dotp_file = _load_matching_dotp(file.bw, file.sf)
    
    signal, base_symbol = _convert_files(file, dotp_file)
    
    return signal, base_symbol


def _extract_and_manual_adjust(signal):
    signal.extract_packets(method='slide-mean', auto_adj=False, overlap=0.5)
    signal.adjust_packets(
        force_check=True, 
        adjust_type='biased-mean', 
        look_ahead=100, threshold=0.5,
    )
    
    return signal


def _format_output_path(base_dir, signal):
    filename = pathlib.Path(signal.stats.filename)
    
    out_path = base_dir.joinpath(
        'processed-symbols/outdoor'
    ).joinpath(
        filename.with_suffix('').with_suffix('.p')
    )
    
    return out_path


def _save_symbols(data, signal, base_dir):
    out_path = _format_output_path(base_dir, signal)
    
    with out_path.open('wb') as outfile:
        cPickle.dump(data, outfile)

        
def _extract_symbols(packet):
    packet.extract_preamble_window()
    return packet._preamble_window


def _extract_and_save_symbols(packets):
    full_array = np.vstack([
        _extract_symbols(packet)
        for packet in packets
    ])
    
    packet = packets[0]
    _save_symbols(full_array, packet)
        

def _get_correlation_values(base_symbol, preamble, samp_per_sym, shift_step):
    shifts = sym_utils.generate_shifts(
        samp_per_sym, range_factor=10, step=shift_step,
    )
    
    corr_vals = sym_utils.shift_and_correlate(
        base_symbol.data, preamble, samp_per_sym, shifts,
    ) 
    
    return corr_vals

def _get_adjusted_distance(samp_per_sym, shift_step):
    distance = int(samp_per_sym // shift_step)
    distance *= 0.90 
    return distance


def _find_peaks(corr_vals, samp_per_sym, shift_step):
    adjusted_dist = _get_adjusted_distance(samp_per_sym, shift_step)
    
    peaks = spsig.find_peaks(
        corr_vals, 
        distance=adjusted_dist,
    )[0]
    
    return peaks 


def _corr_sanity_plot(corr_vals, peaks):
    symbol_strips = [
        np.max(corr_vals) * 1.1 if idx in peaks else 0
        for idx, _ in enumerate([0] * len(corr_vals))
    ]
    
    fig, axs = plt.subplots(2)
    axs[0].plot(corr_vals)
    axs[1].plot(corr_vals)
    axs[1].plot(symbol_strips)
    plt.show()

    
def _extract_symbols_from_peaks(packet_data: np.ndarray, peak_shifts: list, samp_per_sym: int) -> np.ndarray:
    symbols = np.vstack([
        packet_data[shift: shift+samp_per_sym]
        for shift in peak_shifts
    ])
    
    return symbols
        

def _sanity_plot(symbols):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        fig, ax = plt.subplots(symbols.shape[0])

        for idx, sym in enumerate(symbols):
            ax[idx].plot(sym)


def correlate_and_slice(packet, base_symbol, save_sanity=False):
    samp_per_sym, shift_step = packet.stats.samp_per_sym, _step_dict[packet.stats.bw]
    
    shifts = sym_utils.generate_shifts(
        samp_per_sym, range_factor=10, step=shift_step,
    )

    corr_vals = sym_utils.shift_and_correlate(
        base_symbol, packet.data, samp_per_sym, shifts,
    ) 
    
    peaks = _find_peaks(corr_vals, samp_per_sym, shift_step)
    
    shifts = list(shifts)
    peak_shifts = [shifts[peak] for peak in peaks]
    
    symbols = _extract_symbols_from_peaks(packet.data, peak_shifts, samp_per_sym)
    
    if save_sanity:
        _sanity_plot(symbols)
    
    return symbols



def slice_all_packets(packets, symbol_data):
    six_symbol_data = np.concatenate([symbol_data] * 6)
    corr_slice = partial(correlate_and_slice, base_symbol=six_symbol_data)
    
    with multiprocessing.Pool() as pool:
        results = pool.map(corr_slice, packets)
        
    min_size = (len(res) for res in results)
    results = [res[:min_size] for res in results]
    return np.vstack(results)

def process_and_save(file):
    signal, base_symbol = _load_and_convert(file)
    signal = _extract_and_manual_adjust(signal)
    results = slice_all_packets(signal.packets, base_symbol.data[0])
    
    _save_symbols(results, signal, _BASE_DATA_DIR)
    

# manual process

In [8]:
sorted(loader.file_list, key=lambda fl: fl.name)

[DatFile(id=0 | name='lora_BW9_SF12_915MHz_1Msps_L46.dat'),
 DatFile(id=1 | name='lora_BW7_SF12_915MHz_1Msps_L3.dat'),
 DatFile(id=2 | name='lora_BW7_SF11_915MHz_1Msps_L4.dat'),
 DatFile(id=3 | name='lora_BW8_SF11_915_25MHz_1Msps_L37.dat'),
 DatFile(id=4 | name='lora_BW2_SF11_914_75MHz_1Msps_L19.dat'),
 DatFile(id=5 | name='lora_BW2_SF11_915MHz_1Msps_L6.dat'),
 DatFile(id=6 | name='lora_BW7_SF10_915_25MHz_1Msps_L33.dat'),
 DatFile(id=7 | name='lora_BW2_SF10_915_25MHz_1Msps_L30.dat'),
 DatFile(id=8 | name='lora_BW7_SF11_914_75MHz_1Msps_L18.dat'),
 DatFile(id=9 | name='lora_BW9_SF12_915MHz_1Msps_L2.dat'),
 DatFile(id=10 | name='lora_BW9_SF10_915MHz_1Msps_L1.dat')]

In [9]:
file0 = loader.select(9)
file1 = loader.select(1)
file2 = loader.select(0)

## single

In [None]:
os.environ['MULTIPROC'] = 'true'

logger.warning(f'working file: {file0}')
process_and_save(file0)

[32m2020-04-17 18:35:41.727[0m | [34m[1mDEBUG   [0m | [36mlorapy.datafile.encoding[0m:[36mcompute_params[0m:[36m15[0m - [34m[1mcomputed samples per symbol: 8192 and packet length: 247808[0m
[32m2020-04-17 18:35:41.831[0m | [1mINFO    [0m | [36mlorapy.datafile._base_file[0m:[36mload[0m:[36m61[0m - [1mloaded 15561810 samples from file[0m
[32m2020-04-17 18:35:41.834[0m | [34m[1mDEBUG   [0m | [36mlorapy.datafile.encoding[0m:[36mcompute_params[0m:[36m15[0m - [34m[1mcomputed samples per symbol: 8192 and packet length: 247808[0m
[32m2020-04-17 18:35:42.272[0m | [1mINFO    [0m | [36mlorapy.datafile._base_file[0m:[36mload[0m:[36m61[0m - [1mloaded 10354688 samples from file[0m
[32m2020-04-17 18:35:42.273[0m | [1mINFO    [0m | [36mlorapy.signals.signal[0m:[36m_process_signal[0m:[36m116[0m - [1mselected "slide-mean" processing method[0m
[32m2020-04-17 18:35:42.313[0m | [1mINFO    [0m | [36mlorapy.signals.processing.sliding_mean[

## loop

In [None]:
exceptioned_files = []
os.environ['MULTIPROC'] = 'true'

for file in sorted(loader.file_list, key=lambda fl: fl.name):
    logger.warning(f'working file: {file}')
    process_and_save(file)
    
#     try:
#         process_and_save(file)
#     except Exception as exc:
#         logger.error(f'encountered exception for file {file}\n{exc}')
#         exceptioned_files.append(file.name) 
    
    


## sleep version

## process loop

In [None]:
exceptioned_files = []

for file in tqdm_notebook(loader.file_list):
    file.load()
    dotp_file = _load_matching_dotp(file.bw, file.sf)

    signal, base_symbol = _convert_files(file, dotp_file)
    logger.info(f'\n{signal}')
    logger.info(f'\n{base_symbol}')
    
    try:
        packets = _extract_and_manual_adjust(base_symbol, _step_dict)
        _extract_and_save_symbols(packets)
    except Exception:
        exceptioned_files.append(file.name) 
    
    break

In [None]:
new_filenames = [
    'lora_BW1_SF10_915MHz_20ksps_Att0_v1.dat', 
    'lora_BW1_SF10_915MHz_20ksps_Att100_v1.dat', 
    'lora_BW1_SF10_915MHz_20ksps_Att120_v1.dat', 
    'lora_BW1_SF10_915MHz_20ksps_Att140_v1.dat', 
    'lora_BW1_SF10_915MHz_20ksps_Att20_v1.dat', 
    'lora_BW1_SF10_915MHz_20ksps_Att40_v1.dat', 
    'lora_BW1_SF10_915MHz_20ksps_Att60_v1.dat', 
    'lora_BW1_SF10_915MHz_20ksps_Att80_v1.dat', 
    'lora_BW1_SF11_915MHz_20kspsAtt120_v1.dat', 
    'lora_BW1_SF11_915MHz_20kspsAtt140_v1.dat', 
    'lora_BW1_SF11_915MHz_20ksps_Att0_v1.dat', 
    'lora_BW1_SF11_915MHz_20ksps_Att100_v1.dat', 
    'lora_BW1_SF11_915MHz_20ksps_Att20_v1.dat', 
    'lora_BW1_SF11_915MHz_20ksps_Att40_v1.dat', 
    'lora_BW1_SF11_915MHz_20ksps_Att60_v1.dat', 
    'lora_BW1_SF11_915MHz_20ksps_Att80_v1.dat', 
    'lora_BW1_SF12_915MHz_20kspsAtt0_v1.dat', 
    'lora_BW1_SF12_915MHz_20kspsAtt0_v2.dat', 
    'lora_BW1_SF12_915MHz_20kspsAtt100_v1.dat', 
    'lora_BW1_SF12_915MHz_20kspsAtt120_v1.dat', 
    'lora_BW1_SF12_915MHz_20kspsAtt140_v1.dat', 
    'lora_BW1_SF12_915MHz_20kspsAtt20_v1.dat', 
    'lora_BW1_SF12_915MHz_20kspsAtt40_v1.dat', 
    'lora_BW1_SF12_915MHz_20kspsAtt60_v1.dat', 
    'lora_BW1_SF12_915MHz_20kspsAtt80_v1.dat', 
    'lora_BW2_SF10_915MHz_20ksps_Att0_v1.dat', 
    'lora_BW2_SF10_915MHz_20ksps_Att100_v1.dat', 
    'lora_BW2_SF10_915MHz_20ksps_Att120_v1.dat', 
    'lora_BW2_SF10_915MHz_20ksps_Att140_v1.dat', 
    'lora_BW2_SF10_915MHz_20ksps_Att20_v1.dat', 
    'lora_BW2_SF10_915MHz_20ksps_Att40_v1.dat', 
    'lora_BW2_SF10_915MHz_20ksps_Att60_v1.dat', 
    'lora_BW2_SF10_915MHz_20ksps_Att80_v1.dat', 
    'lora_BW2_SF11_915MHz_20ksps_Att0_v1.dat', 
    'lora_BW2_SF11_915MHz_20ksps_Att100_v1.dat', 
    'lora_BW2_SF11_915MHz_20ksps_Att120_v1.dat', 
    'lora_BW2_SF11_915MHz_20ksps_Att140_v1.dat', 
    'lora_BW2_SF11_915MHz_20ksps_Att20_v1.dat', 
    'lora_BW2_SF11_915MHz_20ksps_Att40_v1.dat', 
    'lora_BW2_SF11_915MHz_20ksps_Att60_v1.dat', 
    'lora_BW2_SF11_915MHz_20ksps_Att80_v1.dat', 
    'lora_BW2_SF12_915MHz_20ksps_Att0_v1.dat', 
    'lora_BW2_SF12_915MHz_20ksps_Att100_v1.dat', 
    'lora_BW2_SF12_915MHz_20ksps_Att120_v1.dat', 
    'lora_BW2_SF12_915MHz_20ksps_Att140_v1.dat', 
    'lora_BW2_SF12_915MHz_20ksps_Att20_v1.dat', 
    'lora_BW2_SF12_915MHz_20ksps_Att40_v1.dat', 
    'lora_BW2_SF12_915MHz_20ksps_Att60_v1.dat', 
    'lora_BW2_SF12_915MHz_20ksps_Att80_v1.dat', 
]