# Data exploration

In [1]:
import numpy as np
import plotly.graph_objects as go
import plotly.io as pio

from libs.chapter5.pipeline.raw_data_loading import load_labelled_data, count_samples, DATASET_PATH
from libs.chapter5.pipeline.filters import dbscan_filtering, wavelet_filtering
from plotly.subplots import make_subplots

pio.renderers.default = "notebook"
SUBCARRIERS_INDEXES = np.append(np.arange(-28, 0), np.arange(1, 29))


In [2]:
d1_executions, d1_labels = load_labelled_data(DATASET_PATH.format('D1'))
d2_executions, d2_labels = load_labelled_data(DATASET_PATH.format('D2'))
d3_executions, d3_labels = load_labelled_data(DATASET_PATH.format('D3'))
d4_executions, d4_labels = load_labelled_data(DATASET_PATH.format('D4'))

In [3]:
#| label: tbl-csi_collected_samples
#| tbl-cap: Collected CSI samples fir each activity and dataset

count_samples([d1_labels, d2_labels, d3_labels, d4_labels])

Unnamed: 0,D1,D2,D3,D4
SEATED_RX,2864.0,614.0,593.0,569.0
STANDING_UP_RX,1305.0,293.0,276.0,269.0
WALKING_TX,2285.0,455.0,466.0,469.0
TURN_TX,1133.0,222.0,238.0,208.0
SITTING_DOWN_TX,1538.0,351.0,301.0,315.0
SEATED_TX,2890.0,415.0,504.0,499.0
STANDING_UP_TX,1289.0,291.0,271.0,267.0
WALKING_RX,2470.0,503.0,504.0,510.0
TURN_RX,997.0,194.0,228.0,175.0
SITTING_DOWN_RX,1524.0,353.0,304.0,301.0


In [4]:
def plot_amplitudes_heatmap(data, labels, sequences, apply_preprocessing=False):
    directions = ['_'.join(sequence.split('_')[1:]) for sequence in sequences]
    fig = make_subplots(rows=len(sequences), cols=1, shared_xaxes=False, vertical_spacing=0.25, 
                        y_title='<b>Subcarrier index</b>', subplot_titles=['\u2192'.join(direction.split('_')).upper() for direction in directions])
    fig.update_annotations(font_size=20) # To change font size of y_title
    
    actions_rx_tx = ["SEATED_RX", "STANDING_UP_RX", "WALKING_TX", "TURNING_TX", "SITTING_DOWN_TX", "SEATED_TX"]
    actions_tx_rx = ["SEATED_TX", "STANDING_UP_TX", "WALKING_RX", "TURNING_RX", "SITTING_DOWN_RX", "SEATED_RX"]
    
    for i, (sequence, direction) in enumerate(zip(sequences, directions)):
        
        actions = actions_rx_tx if direction == 'rx_tx' else actions_tx_rx
        
        sequence_data = data[sequence]
        sequence_labels = labels[sequence]

        
        x = np.arange(len(sequence_data))
        y = SUBCARRIERS_INDEXES
        if apply_preprocessing:
            sequence_data = np.apply_along_axis(lambda x: wavelet_filtering(dbscan_filtering(x)),1, sequence_data)
        
        fig.add_trace(
            go.Heatmap(
                z=sequence_data,
                x=x,
                y=y,
                colorscale = 'Blues',
                coloraxis = "coloraxis"
            ),
            row=i+1, col=1
        )
        
        boundaries = np.append(np.where(np.roll(sequence_labels,1) != sequence_labels)[0], [len(sequence_labels) - 1])
        
        for j, boundary in enumerate(boundaries):
            fig.add_vline(
                x=boundary, line_width=3, line_dash="dash", line_color="green",
                row=i+1, col=1
            )
            
            if j != len(boundaries) - 1:
                arangement = { "position": 'bottom', "yoffset": -25 } if j % 2 == 0 else { "position": 'top', "yoffset": 25 }
                color = 'green' if i % 2 == 0 else 'blue'
            
                fig.add_vrect(
                    x0=boundary, x1=boundaries[j+1],
                    fillcolor='white', opacity=0,
                    layer="below", line_width=0,
                    annotation_text=f'<b>{actions[j]}</b>', annotation_font_size=16, annotation_font_family='Courier', #annotation_font_color='white',
                    annotation_position=arangement['position'], annotation_xanchor='center', annotation_yshift=arangement['yoffset'], annotation_bordercolor='black', #annotation_bgcolor=color,
                    row=i+1, col=1
                )

    fig.update_layout(height=300*len(sequences)) #width=1000)
    fig.update_xaxes(visible=False, showticklabels=False)
    fig.layout.coloraxis.colorbar.title = {'text': "<b>Amplitude</b>", 'font': {'size': 16}}
    fig.update_coloraxes(
        autocolorscale=False, 
        colorscale='RdBu_r', 
        colorbar = {
            'orientation': 'h',
            'thickness': 20,
            'y': 1.2
        }
    )
    return fig

In [5]:
#| label: fig-csi_data
#| fig-cap: "First two executions of the D1 dataset before and after applying the data processing techniques."
#| fig-subcap:
#|   - "Before preprocessing"
#|   - "After preprocessing"
#| column: page-right

sequences = [
    'e01_rx_tx',
    'e01_tx_rx'
]

plot_amplitudes_heatmap(d1_executions, d1_labels, sequences, apply_preprocessing=False).show()
plot_amplitudes_heatmap(d1_executions, d1_labels, sequences, apply_preprocessing=True).show()