In [None]:
import random

import chained_filter
import dim_reducer
import illiquidity_filter
import infty_filter
import len_filter
import numpy as np
import pandas as pd
import plotter
import real_data_loader
import return_statistic
import sp500_statistic
import time_filter
import torch

In [None]:
# Set seeds.
SEED = 12345
np.random.seed(SEED)
random.seed(SEED)
torch.manual_seed(SEED)

# Load data

In [None]:
real_loader = real_data_loader.RealDataLoader(cache="../data/cache")
price_data = real_loader.get_timeseries(data_path="../data/raw_yahoo_data")

## Apply filters

In [None]:
filter_list = []

min_lenght = 2000
filter_list.append(len_filter.LenFilter(min_lenght))

illiquidity_window = 10
min_illiquidity_jumps = 2
exclude_illiquidity_tolerance = 10
filter_list.append(
    illiquidity_filter.IlliquidityFilter(
        window=illiquidity_window,
        min_jumps=min_illiquidity_jumps,
        exclude_tolerance=exclude_illiquidity_tolerance,
    )
)

filter_list.append(infty_filter.InftyFilter())

first_date = pd.Timestamp(year=1990, month=1, day=1)
ch_filter = chained_filter.ChainedFilter(
    filter_chain=filter_list, time_filter=time_filter.TimeFilter(first_date=first_date)
)

In [None]:
price_data_filtered = price_data.copy()
ch_filter.fit_filter(price_data)
ch_filter.apply_filter(price_data_filtered)

print(price_data.shape)
print(price_data_filtered.shape)

### Compute Statistics

In [None]:
price_statistic = sp500_statistic.SP500Statistic()
price_statistic.set_statistics(price_data)
price_statistic_filtered = sp500_statistic.SP500Statistic()
price_statistic_filtered.set_statistics(price_data_filtered)

### Print statistics

In [None]:
price_statistic.print_distribution_properties()

In [None]:
price_statistic_filtered.print_distribution_properties()

### Plot Distributions

In [None]:
plot = plotter.Plotter(
    cache="data/cache",
    figure_name="data_set_distribution",
    figure_title="Data Set Distribution ",
    figure_style={
        "figure.figsize": (16, 10),
        "figure.titlesize": 22,
        "axes.titlesize": 18,
        "axes.labelsize": 16,
        "font.size": 17,
        "xtick.labelsize": 15,
        "ytick.labelsize": 15,
        "figure.dpi": 96,
        "legend.loc": "upper right",
        "figure.constrained_layout.use": True,
        "figure.constrained_layout.h_pad": 0.1,
        "figure.constrained_layout.hspace": 0,
        "figure.constrained_layout.w_pad": 0.1,
        "figure.constrained_layout.wspace": 0,
    },
    subplot_layout={
        "ncols": 2,
        "nrows": 1,
        "sharex": "none",
        "sharey": "none",
    },
)

price_statistic.draw_histogram(
    plot.axes[0],
    style={"color": "green", "density": False},
    y_log_scale=True,
    y_label=r"# stocks",
)
price_statistic_filtered.draw_histogram(
    plot.axes[1],
    style={"color": "green", "density": False},
    y_log_scale=True,
    y_label=r"# stocks",
)
plot.axes[1].set_title("Filtered Data")
plot.axes[0].set_title("Unfiltered Data")

### Dimensionality reduction

In [None]:
price_return_statistic = return_statistic.ReturnStatistic()
price_return_statistic.set_statistics(price_data)

price_return_statistic_filtered = return_statistic.ReturnStatistic()
price_return_statistic_filtered.set_statistics(price_data_filtered)

In [None]:
%%capture
plot = plotter.Plotter(
    cache="data/cache",
    figure_name="return_tsne",
    figure_title="Price Return TSNE",
    figure_style={
        "figure.figsize": (16, 10),
        "figure.titlesize": 22,
        "axes.titlesize": 18,
        "axes.labelsize": 16,
        "font.size": 17,
        "xtick.labelsize": 15,
        "ytick.labelsize": 15,
        "figure.dpi": 96,
        "legend.loc": "upper right",
        "figure.constrained_layout.use": True,
        "figure.constrained_layout.h_pad": 0.1,
        "figure.constrained_layout.hspace": 0,
        "figure.constrained_layout.w_pad": 0.1,
        "figure.constrained_layout.wspace": 0,
    },
    subplot_layout={
        "ncols": 1,
        "nrows": 1,
        "sharex": "none",
        "sharey": "none",
    },
)
tsne_reducer = dim_reducer.DimReducer("TSNE")

In [None]:
style = {"color": "blue", "label": "Unfiltered Data"}
tsne_reducer.draw_reduction(
    plot.axes, price_return_statistic.statistic.T[:, :-103], **style
)

style = {"color": "red", "label": "Filtered Data"}
tsne_reducer.draw_reduction(
    plot.axes, price_return_statistic_filtered.statistic.T[:, :-2000], **style
)
plot.figure