# Multi-scale Intensity Characterisation ($\mu_s$IC)

Denoising, feature identification, error estimation, and signal/background/noise separation

# 1. Init

## System setup

In [None]:
%matplotlib ipympl
from matplotlib import pyplot as plt
from matplotlib import colors
from matplotlib.ticker import AutoMinorLocator

import ipywidgets as widgets
from IPython.display import display

import numpy as np
from time import time
from scipy import ndimage, special

from astropy import stats
#import pywt
#import pynkowski as mf   # For Minkowski Functionals

In [None]:
import importlib  # will not be needed in production
import scripts

## Function definitions

In [None]:
"""
def uniform_filter(cumulant, h):
    '''Uniform filter, based on cumulant'''
    scale = 2*h + 1
    if scale > cumulant.size:
        print(f'ERROR: The requested scale, 2*{h} = {scale}, is larger than size = {cumulant.size}')
        return cumulant*np.nan
    
    f = np.empty_like(cumulant)
    f[:h+1] = cumulant[h:scale] / (np.arange(h+1) + 1)
    f[h+1:f.size-h] = (cumulant[scale:] - cumulant[:-scale]) / scale
    f[f.size-h:] = (cumulant[-1] - cumulant[-scale:-h-1]) / (h - np.arange(h))
    return f
"""


In [None]:
def nanfilter(cumulant, cumulant_n, h):
    '''Uniform filter, taking NaN into account (faster)'''
    scale = 2*h + 1
    if scale > data.size:
        print(f'WARNING: The requested scale, 2*{h} + 1 = {scale}, is larger than size = {data.size}')
        return np.full_like(data, np.nanmean(data))
    
    f = np.empty_like(cumulant)
    f[:h+1] = np.where(cumulant_n[h:scale] > 0, cumulant[h:scale] / cumulant_n[h:scale], 0)
    f[h+1:f.size-h] = np.where(cumulant_n[scale:] > cumulant_n[:-scale],
                               (cumulant[scale:] - cumulant[:-scale]) / (cumulant_n[scale:] - cumulant_n[:-scale]), 0)
    f[f.size-h:] = np.where(cumulant_n[-1] > cumulant_n[-scale:-h-1],
                            (cumulant[-1] - cumulant[-scale:-h-1]) / (cumulant_n[-1] - cumulant_n[-scale:-h-1]), 0)
    return f


def nan_uniform_filter(x, h):
    return nanfilter(np.nancumsum(x), np.nancumsum(np.isfinite(x)), h)

In [None]:
def mexican_top_hat(x, h):
    cumulant_x = np.nancumsum(x)
    cumulant_n = np.nancumsum(np.isfinite(x))
    #return np.sqrt(1.5) * (nanfilter(cumulant_x, cumulant_n, h) - nanfilter(cumulant_x, cumulant_n, 3*h + 1))
    return nanfilter(cumulant_x, cumulant_n, h) - nanfilter(cumulant_x, cumulant_n, 3*h + 1)

## Read data

In [None]:
importlib.reload(scripts.read_data)
object_name, data, beam_FWHM_pix, true_signal = scripts.read_data.run(11, (0, 0, 1))

In [None]:
data = np.zeros(666)
sigma = 1
#data[142:169] = 1
data[42:84] = 1
data[142:184] = -1
data[242:284] = 1
true_signal = data.copy()
data += np.random.normal(0, sigma, data.size)
'''
'''


# 2. Feature extraction / variance estimation

Actually, I'm not sure whether this might be an overkill.
Unless it is able to correctly account for correlated noise!

In [None]:
cumulative_n = np.nancumsum(np.isfinite(data))

s = [1]
while s[-1] < data.size/3:
    s.append(3 * s[-1])
s = np.array(s)
h_list = (s - 1) // 2
n_h = h_list.size

In [None]:
def get_features(x):
    '''Multi-scale feature extraction'''
    
    cumulative_data = np.nancumsum(x)
    cumulative_data2 = np.nancumsum(x**2)
    cumulative_n = np.nancumsum(np.isfinite(x))

    mth = np.empty(h_list.shape + x.shape)
    mth_var = np.empty_like(mth)
    
    for i in range(h_list.size):
        mth[i] = nanfilter(cumulative_data, cumulative_n, h_list[i])
        mth_var[i] = nanfilter(cumulative_data2, cumulative_n, h_list[i])
    mth_var -= mth**2
    mth_var[1:, :] /= 2*h_list[1:, np.newaxis]

    #for i in range(h_list.size-1):
    #    mth[i] -= mth[i+1]
    #    mth_var[i] += mth_var[i+1]
    # WARNING: race condition?
    mth[:-1] -= mth[1:]
    mth_var[:-1] += mth_var[1:]

    mth[-1] -= np.nanmean(x)
    mth_var[0] += np.sum(mth_var, axis=0)
    
    return mth, mth_var

In [None]:
# Only used to compute data variance
mu_s_data, var_s_data = get_features(data)
data_var = np.sum(var_s_data, axis=0)
print(np.sqrt(np.nanpercentile(data_var, [16, 50, 84])))

In [None]:
#if true_signal is not None:
#    mu_s_true, var_s_true = get_features(true_signal)

# 3. Reconstruction

In [None]:
mu_s = np.zeros(h_list.shape + data.shape)
mu_var = np.zeros_like(mu_s)
#mu_estimate = np.zeros_like(mu_s)
data_estimate = np.zeros_like(data)
estimate_var = np.zeros_like(data)

iteration = 0
max_iter = 30
rms = np.inf
evidence = 0
while rms > 1 and iteration < max_iter:
    iteration += 1
    old_rms = rms
    old_evidence = evidence

    residual = data - data_estimate
    
    delta_s = np.empty_like(mu_s)
    delta = np.zeros_like(data)
    for i, h in enumerate(h_list):
        estimate = nan_uniform_filter(residual, h+1)
        estimate *= np.nansum(residual * estimate) / np.sum(estimate**2)
        #estimate *= np.sqrt(2*h+1) #* np.exp(-.5*np.nansum((residual-estimate)**2/data_var))
        delta_s[i] = estimate
        delta += delta_s[i]
    mu_s += delta_s * np.nansum(residual * delta) / np.sum(delta**2)
    
    data_estimate = np.zeros_like(data)
    for i, h in enumerate(h_list):
        data_estimate += nan_uniform_filter(mu_s[i], h+1)

    rms = np.sqrt(np.nanmean((data-data_estimate)**2/data_var))
    evidence = np.exp(-.5 * np.nanmean((data - data_estimate)**2 / estimate_var)) / np.nanmean(estimate_var)
    print(iteration, rms, np.nanmean(residual**2), np.nanmean(estimate_var), np.nanmean(data_var), evidence)
    if evidence < old_evidence:        
        for i, h in enumerate(h_list):
            mu_s[i] -= delta[i]
        break

#w1 = nan_uniform_filter(np.exp(-.5*(mth - mth_smooth)**2/mth_var), h)
#w2 = nan_uniform_filter(np.exp(-.5*(mth - estimate)**2/mth_var), h)
#final = (w1*mth_smooth + w2*estimate) / (w1 + w2)

In [None]:
#mu_s_est, var_s_est = get_features(data_estimate)

In [None]:
signal_s = np.empty_like(mu_s)
significance_s = np.empty_like(mu_s)
for i, h in enumerate(h_list):
    signal_s[i] = nan_uniform_filter(mu_s[i], h+1)
    significance_s[i] = np.sqrt(nan_uniform_filter(mu_s[i]**2, h+1))    

# 4. Results

In [None]:
fig, axes = scripts.utils.new_figure(object_name, nrows=3, figsize=(12, 8))

scripts.utils.colour_map(axes[0, 0], '$\mu_s$', mu_s)
scripts.utils.colour_map(axes[1, 0], '$S_s$', signal_s)
scripts.utils.colour_map(axes[2, 0], '$\Sigma_s$', significance_s)

for ax in axes.ravel():
    ax.contour(significance_s, levels=np.nanmax(significance_s) / 2**(15-np.arange(16)), colors='k')

'''
def plot_significance(ax, label, mu_s, **kwargs):
    estimate = np.empty_like(mu_s)
    significance = np.empty_like(mu_s)
    for i, h in enumerate(h_list):
        estimate[i] = nan_uniform_filter(mu_s[i], h+1)
        significance[i] = np.sqrt(nan_uniform_filter(mu_s[i]**2, h+1))
    #scripts.utils.colour_map(ax, label, significance, **kwargs)
    scripts.utils.colour_map(ax, label, estimate, **kwargs)
    ax.contour(significance, levels=np.nanmax(significance) / 2**(15-np.arange(16)), colors='k')

p0, p50, p100 = np.nanpercentile(mu_s_est**2, [0, 50, 100])
norm = colors.SymLogNorm(vmin=-p100, linthresh=p50, vmax=p100)
norm = None
cmap = 'rainbow_r'
plot_significance(axes[0, 0], '$\mu_s$ data', mu_s_data, norm=norm, cmap=cmap)
plot_significance(axes[1, 0], '$\mu_s$ estimate', mu_s, norm=norm, cmap=cmap)
#plot_significance(axes[2, 0], '$\mu_s$ true', mu_s_true, norm=norm, cmap=cmap)
'''

#axes[0, 0].set_xlim(950, 1250)
#axes[0, 0].set_xlim(3350, 3850)


In [None]:
fig, axes = scripts.utils.new_figure('filter', nrows=n_h, sharey=True)  #, figsize=(12, 8))

for i, h in enumerate(h_list):
    ax = axes[i, 0]
    
    if true_signal is not None:
        ax.plot(true_signal, 'k:', label='true signal')
        #x = 1.5 * np.sqrt(np.clip(mexican_top_hat(mu_s_true[i]**2, h+1), 0, np.inf))
        #ax.plot(x, 'k--', label='true intensity')

    x = data
    noise = np.sqrt(data_var)
    ax.plot(x, 'r-', alpha=.2, label='data')
    ax.fill_between(np.arange(data.size), x-noise, x+noise, color='r', alpha=.1)

    x = data_estimate
    noise = np.sqrt(estimate_var)
    ax.plot(x, 'k--', alpha=.5, label=f'estimate')
    ax.fill_between(np.arange(data.size), x-noise, x+noise, color='k', alpha=.1)

    #x = mu_s[i]
    #x = nan_uniform_filter(mu_s[i], h+1)
    x = signal_s[i]
    noise = np.sqrt(data_var)
    ax.plot(x, 'b-', alpha=.5, label=f'h={h}')
    ax.fill_between(np.arange(data.size), x-noise, x+noise, color='b', alpha=.1)

    x = np.sum(signal_s[:i+1, :], axis=0)
    ax.plot(x, 'r-', alpha=.5, label=f'h<={h}')

    if i < n_h-1:
        x = np.sum(signal_s[i+1:, :], axis=0)
        ax.plot(x, 'r:', alpha=.5, label=f'h>{h}')

    '''
    x = 1.5 * np.sqrt(np.clip(mexican_top_hat(mu_s_data[i]**2, h+1), 0, np.inf))
    noise = np.sqrt(nan_uniform_filter(var_s_data[i], h))
    ax.plot(x, 'k-', label='feature intensity')
    ax.fill_between(np.arange(data.size), x-noise, x+noise, color='g', alpha=.1)
    '''

    #ax.set_xlim(950, 1250); ax.set_ylim(-10, 250)
    #ax.set_xlim(3350, 3850); ax.set_ylim(-50, 450)
    ax.legend()

# --- STOP ---

In [None]:
raise -1