# Process LocalAir sensor data

This script will import data from the LocalAir Escooter-based sensors.

Data is encryped by the sensors, and this script decryptes before converting it into a pandas dataframe
and then visualising it.

Data can either be retrieved from a speificied folder, where all of the .bin files will be imported,
or it can be downloaded directly from the website which the sensors uploads to. To avoid downloading
files unnecceraily a time range can be specified.

Behaviour is controlled through the "source" flag.

In [None]:
from pathlib import Path
import warnings

import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import toml

from lib.decryption import LocalAirDecryptor
from lib.preprocess import convert_to_dataframe

secrets = toml.load('secrets.toml')

%reload_ext autoreload
%autoreload 2

pd.options.display.max_columns = None

In [None]:
#key = random.getrandbits(256)
key = secrets['key']
decryptor = LocalAirDecryptor(key, block_size=128)

#datafiles = list(Path('../data/from_server').glob('*.bin'))
datafiles = list(Path('../data/240330').glob('*.txt'))

In [None]:
def decrypt_datafile(datafile):
    with datafile.open() as f:
        for line_no, line in enumerate(f):
            line = line.rstrip('\n')
            if not line:
                continue

            try:
                yield decryptor.decrypt_data(line)
            except ValueError as e:
                warnings.warn(f'Failed to process line {line_no} in {datafile}: {e}', stacklevel=2)
                continue

def decrypt_datafiles(datafiles):
    for datafile in datafiles:
        yield from decrypt_datafile(datafile)

In [None]:
message = list(decrypt_datafiles(datafiles))
data = convert_to_dataframe(message)
data.head()

## Plotting

### Points

In [None]:
data.plot(markersize=0.1)

In [None]:
during_experiment = (data['datetime'] > '2023-10-19T00:00:00Z') & (data['datetime'] < '2024-11-15T00:00:00Z')

def explore(data, *args, save_as=None, **kwargs):
    if 'datetime' in data:
        # If present, convert Python datetimes to strings so they can be serialised as JSON
        data = data.astype({'datetime': str})
    
    kwargs = {
        'tiles': 'CartoDB positron',
        'cmap': 'magma_r',
        'marker_kwds': {'radius': 5},
        # This is set below: 'map_kwds': {'scrollWheelZoom': False},
        **kwargs,
    }

    map = data.explore(*args, **kwargs)
    
    # If a filename is passed as save_as, then save an HTML version of the map
    if save_as is not None:
        map.save('../outputs/' + save_as)

    # Show the map but disable zooming with th emouse wheel as this makes navigating
    # the Jupyter notebook painful
    map.options['scrollWheelZoom'] = False
    return map

explore(
    data[during_experiment],
    save_as='map_points.html',
)

### CO

In [None]:
explore(
    data[during_experiment],
    'MultiGas.co',
    save_as='map_CO.html',
)

In [None]:
#clipped = ~data.clip_by_rect(-2.604, 51.445, -2.567, 51.475).is_empty
clipped = ~data.clip_by_rect(-10, 50, 0, 52).is_empty
explore(
    data[during_experiment & clipped],
    'MultiGas.co',
    save_as='map_CO_transparent.html',
    style_kwds={'stroke': False, 'fillOpacity': 0.2},
    #style_kwds={'stroke': False, 'fillOpacity': 1},
    marker_kwds={'radius': 20},
)

In [None]:
import h3pandas
explore(
    data[during_experiment & clipped]
        .set_index('datetime')
        .h3.geo_to_h3_aggregate(operation='max', resolution=10),
    'MultiGas.co',
    save_as='map_CO_hex.html',
)

### NO₂

In [None]:
explore(
    data[during_experiment],
    'MultiGas.no2',
    save_as='map_NOx.html',
)

### PM₁₀

In [None]:
explore(
    data[during_experiment],
    'PM_Sensor.atmos_enviro.AE_10.0',
    scheme='quantiles',
    k=10,
    save_as='map_PM10.html',
)

In [None]:
def log_column(df, column):
    return df.assign(**{column: np.log(df[column] + 1)})

explore(
    data[during_experiment]
        .pipe(log_column, 'PM_Sensor.atmos_enviro.AE_10.0'),
    'PM_Sensor.atmos_enviro.AE_10.0',
    save_as='map_PM10_log.html',
)

## Correlations

In [None]:
(
    data.select_dtypes(['Int64', 'Float64'])
    .corr()
    .style.background_gradient(cmap='RdBu', vmin=-1, vmax=1)
)

In [None]:
sns.heatmap(
    data.select_dtypes(['Int64', 'Float64'])
        .corr(),
    cmap='RdBu',
    vmin=-1,
    vmax=1,
)

In [None]:
sns.heatmap(
    data.select_dtypes(['Int64', 'Float64'])
        .loc[:, :'PM_Sensor.atmos_enviro.AE_10.0']
        .corr(),
    cmap='RdBu',
    vmin=-1,
    vmax=1,
)

In [None]:
data.plot.scatter(x='FFT.258Hz', y='PM_Sensor.atmos_enviro.AE_10.0', logx=True, logy=True, alpha=0.1)

## Spectrogram

In [None]:
fft_data = data.set_index('datetime').filter(like='FFT.').astype(float)

image = plt.imshow(fft_data.T, aspect='auto', origin='lower', norm='log')
image.axes.yaxis.set_ticks(range(len(fft_data.columns)), [freq[4:-2] for freq in fft_data.columns])
image.axes.set_ylabel('Hz')

plt.colorbar()

In [None]:
data.plot.scatter(x='GPS.speed', y='FFT.387Hz', alpha=0.02)