In [2]:
import numpy as np, pandas as pd, plotly.express as px, plotly.graph_objects as go
from scipy.signal import savgol_filter as sgf
from pathlib import Path
import scipy.optimize as opt

from SJ_Haar_CNV import base_search
from SJ_Haar_CNV import decomposition
from SJ_Haar_CNV import report
from tests import test_data

# Generate Toy Data
We make some simulated data with 5 segments. 3 of them are the same mean, and 2 of the segments are alterations. One of them is a focal, drastic change in mean and the latter is a large, minute change in mean. 
We add noise to the data from a normal distribution with a standard deviation equal to the magnitude of alteration 3. This makes that segment difficult to detect.

In [3]:
base_dist = [np.repeat (5,1800), np.repeat (-6,210), np.repeat (5,1900), np.repeat (6,1900), np.repeat(5,1000)]
true_signal = np.concatenate(base_dist)
noisy_signal = true_signal + np.random.normal(0,1,true_signal.size)
types = [np.repeat(f'mean_{i}', j.size) for i,j in enumerate(base_dist)]

In [4]:
df = pd.DataFrame({'true_signal':true_signal,'raw':noisy_signal,'segment':np.concatenate(types)})
df

Unnamed: 0,true_signal,raw,segment
0,5,4.218694,mean_0
1,5,5.503853,mean_0
2,5,3.909286,mean_0
3,5,6.791368,mean_0
4,5,6.584695,mean_0
...,...,...,...
6805,5,4.777396,mean_4
6806,5,5.561940,mean_4
6807,5,5.673921,mean_4
6808,5,6.141050,mean_4


In [5]:
fig = px.scatter(df, x = df.index, y = 'raw', color = 'segment', title = 'Simulated Data with Means marked')
fig.add_annotation(x=4800, y = -2, text = "Slight mean shift ^", showarrow = False)
fig.add_trace(go.Scatter(x = df.index, y = true_signal, mode = 'lines', name = 'True Signal',zorder=-2))
fig.show()

# With the data in hand, here are the steps we need to create our results

In [6]:
basis = base_search.generate_haar_basis(df['raw'], p0=.8, length = 21, debug = False)

In [19]:
basis_matrix, filtered_coefficients, all_coefficients, threshold, rle_y = decomposition.decompose (df['raw'].values, basis)
df['transformed'] = decomposition.rle_to_array(rle_y)

In [21]:
fig = go.Figure()
fig.add_trace (go.Scatter (y = df['true_signal'], name = 'true signal'))
##like a twwinx in matplotlib? so the scales are separate?
for i in np.where(filtered_coefficients)[0]:
    fig.add_trace (go.Scatter (x = np.arange (basis[i][2], basis[i][2] + basis[i][3][1]+basis[i][4][1]),
                               y = 10*decomposition.generate_wavelet_function (basis[i]),
                               name = 'wavelet ' + str(i)))

fig.show()

The "power spectrum" of the wavelets

In [27]:
fig = go.Figure()
fig.add_trace (go.Scatter (x = np.sort(np.abs(all_coefficients)),
                           y = np.linspace (0,1, len(all_coefficients)), mode = 'markers'))

In [17]:
fig = report.visualize_data(df)
fig

This visualization is returned as a basic plotly figure object, so it can be edited afterwards too.

In [18]:
fig.add_trace(go.Scatter(x = df.index, y = df['true_signal'], mode = 'lines', name = 'Original Signal', line={'color':'black'}))

We also have special support for inhomogenous data. You may provide the relative positions of segments to the visualization step

In [13]:
locations = np.array(df.index)
locations[3500:] = locations[3500:] + 500
df['location'] = locations

In [14]:
report.visualize_data(df)

Currently the visualization just extends the noise estimation and segment call across the barriers, but in the future that could be remedied.