## Import

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import collections
import logging
import pprint

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import core.artificial_signal_generators as carsigen
import core.plotting as coplotti
import core.signal_processing as csigproc
import core.statistics as costatis
import helpers.hdbg as hdbg
import helpers.henv as henv
import helpers.hprint as hprint

In [None]:
hdbg.init_logger(verbosity=logging.INFO)

_LOG = logging.getLogger(__name__)

_LOG.info("%s", henv.get_system_signature()[0])

hprint.config_notebook()

# Generate signal

In [None]:
arma00process = carsigen.ArmaProcess([], [])

In [None]:
rets = arma00process.generate_sample(
    {"start": "2000-01-01", "periods": 4 * 252, "freq": "B"},
    scale=0.01,
    burnin=20,
    seed=42,
)

In [None]:
price = np.exp(rets.cumsum())

In [None]:
rets.name += "_rets"
price.name += "_price"

## Price

In [None]:
coplotti.plot_cols(price)

In [None]:
price_decomp = csigproc.get_trend_residual_decomp(price, tau=16)

In [None]:
price_decomp.head(3)

In [None]:
coplotti.plot_cols(price_decomp)

In [None]:
price_decomp.apply(costatis.apply_adf_test)

### Price wavelet decomposition

In [None]:
price_smooth, price_detail = csigproc.get_swt(price, wavelet="haar")

In [None]:
coplotti.plot_cols(price_detail)

In [None]:
coplotti.plot_cols(price_smooth)

In [None]:
coplotti.plot_correlation_matrix(price_detail, mode="heatmap")

## Returns

In [None]:
coplotti.plot_cols(rets)

In [None]:
costatis.apply_normality_test(rets).to_frame()

In [None]:
coplotti.plot_autocorrelation(rets)

In [None]:
coplotti.plot_spectrum(rets)

### Returns wavelet decomposition

In [None]:
rets_smooth, rets_detail = csigproc.get_swt(rets, "haar")

In [None]:
coplotti.plot_cols(rets_detail)

In [None]:
coplotti.plot_cols(rets_detail, mode="renormalize")

In [None]:
rets_detail.apply(costatis.apply_normality_test)

In [None]:
coplotti.plot_autocorrelation(rets_detail, title_prefix="Wavelet level ")

In [None]:
coplotti.plot_spectrum(rets_detail, title_prefix="Wavelet level ")

In [None]:
coplotti.plot_correlation_matrix(rets_detail, mode="heatmap")

### Z-scored returns

In [None]:
zscored_rets = csigproc.get_dyadic_zscored(rets, demean=False)

In [None]:
coplotti.plot_cols(zscored_rets)

In [None]:
zscored_rets.apply(costatis.apply_normality_test)

In [None]:
coplotti.plot_autocorrelation(zscored_rets, title_prefix="tau exp = ")

In [None]:
coplotti.plot_spectrum(zscored_rets, title_prefix="tau exp = ")

# EMAs and Smooth Moving Averages

In [None]:
impulse = carsigen.get_impulse(-252, 3 * 252, tick=1)

In [None]:
impulse.plot()

## Dependence of ema on depth

In [None]:
for i in range(1, 6):
    csigproc.compute_ema(impulse, tau=40, min_periods=20, depth=i).plot()

## Dependence of smooth moving average on max depth

In [None]:
for i in range(1, 6):
    csigproc.compute_smooth_moving_average(
        impulse, tau=40, min_periods=20, min_depth=1, max_depth=i
    ).plot()

## Dependence of smooth moving average on min depth

In [None]:
for i in range(1, 6):
    csigproc.compute_smooth_moving_average(
        impulse, tau=40, min_periods=20, min_depth=i, max_depth=5
    ).plot()

## Dependence of rolling norm on max depth

In [None]:
for i in range(1, 6):
    csigproc.compute_rolling_norm(
        impulse, tau=40, min_periods=20, min_depth=1, max_depth=i, p_moment=1
    ).plot()

## Dependence of rolling norm on moment

In [None]:
for i in np.arange(0.5, 4.5, 0.5):
    csigproc.compute_rolling_norm(
        impulse, tau=40, min_periods=20, min_depth=1, max_depth=2, p_moment=i
    ).plot()

# Smooth Derivatives

## Dependence on tau

In [None]:
for i in range(1, 6):
    csigproc.compute_smooth_derivative(
        impulse, tau=100 * i, min_periods=0, scaling=0, order=1
    ).plot()

## Dependence on order

In [None]:
for i in range(1, 6):
    csigproc.compute_smooth_derivative(
        impulse, tau=100, min_periods=0, scaling=0, order=i
    ).plot()

## Application to slope 1 linear growth with varying tau, scaling = 1

In [None]:
linear_growth = pd.Series(index=price.index, data=range(price.size))

In [None]:
for i in range(1, 6):
    csigproc.compute_smooth_derivative(
        linear_growth, tau=2**i, min_periods=0, scaling=1, order=1
    ).plot()

## Application to prices

In [None]:
dprice = pd.DataFrame(index=price.index)
dprice["rets"] = rets

In [None]:
for i in range(0, 7):
    dprice[i] = csigproc.compute_smooth_derivative(
        price, tau=2**i, min_periods=0, scaling=1, order=1
    )

In [None]:
coplotti.plot_cols(dprice)

In [None]:
coplotti.plot_cols(dprice.cumsum(), mode="renormalize")

# Multivariate series

In [None]:
mvn = carsigen.MultivariateNormalProcess()
mvn.set_cov_from_inv_wishart_draw(dim=8, seed=10)
mvn_rets = mvn.generate_sample(
    {"start": "2000-01-01", "periods": 4 * 252, "freq": "B"}, seed=10
)

In [None]:
coplotti.plot_cols(mvn_rets)

## Z-score the time series

In [None]:
mvn_zrets = csigproc.compute_rolling_zscore(mvn_rets, tau=16, demean=False)

In [None]:
coplotti.plot_cols(mvn_zrets)

## Compute Incremental PCA

In [None]:
eigenvalues, eigenvectors = csigproc.compute_ipca(mvn_zrets, num_pc=3, tau=65)

### Plot eigenvalue evolution over time

In [None]:
coplotti.plot_cols(eigenvalues)

### Plot eigenvector evolution over time

In [None]:
eigenvectors[0].plot()

In [None]:
eigenvectors[1].plot()

In [None]:
eigenvectors[2].plot()

### Plot eigenvector angular distance change over time

In [None]:
eigenvector_diffs = csigproc.compute_eigenvector_diffs(eigenvectors)

In [None]:
coplotti.plot_cols(eigenvector_diffs)

# Outlier handling

In [None]:
np.random.seed(100)
n = 100000
data = np.random.normal(loc=0.0, scale=1.0, size=n)
print(data[:5])

srs = pd.Series(data)
srs.plot(kind="hist")

In [None]:
def _analyze(srs):
    print(np.isnan(srs).sum())
    srs.plot(kind="hist")
    plt.show()
    pprint.pprint(info)

In [None]:
mode = "winsorize"
lower_quantile = 0.01
window = 1000
min_periods = 10
info = collections.OrderedDict()
srs_out = csigproc.process_outliers(
    srs, mode, lower_quantile, window=window, min_periods=min_periods, info=info
)
#
_analyze(srs_out)

In [None]:
mode = "winsorize"
lower_quantile = 0.01
upper_quantile = 0.90
window = 1000
min_periods = 10
info = collections.OrderedDict()
srs_out = csigproc.process_outliers(
    srs,
    mode,
    lower_quantile,
    upper_quantile=upper_quantile,
    window=window,
    min_periods=min_periods,
    info=info,
)
#
_analyze(srs_out)

In [None]:
mode = "set_to_nan"
lower_quantile = 0.01
window = 1000
min_periods = 10
info = collections.OrderedDict()
srs_out = csigproc.process_outliers(
    srs, mode, lower_quantile, window=window, min_periods=min_periods, info=info
)
#
_analyze(srs_out)

In [None]:
mode = "set_to_zero"
lower_quantile = 0.10
window = 1000
min_periods = 10
info = collections.OrderedDict()
srs_out = csigproc.process_outliers(
    srs, mode, lower_quantile, window=window, min_periods=min_periods, info=info
)
#
_analyze(srs_out)