In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from pandas import DataFrame, Series
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import RobustScaler

from datatools import extract_market_data, data_quantization
from pipeline import Dataset
from pipeline.backtest import evaluation_for_submission, cross_validation
from visualization.metric import plot_performance

import xarray as xr

In [2]:
base_ds = xr.open_dataset('../../data/nc/base.nc')
market_brief_ds = xr.open_dataset('../../data/nc/market_brief.nc')
ds = base_ds.merge(market_brief_ds)

In [7]:
current_day = 550
before1_day = 549
train_lookback = 16
per_eval_lookback = 8
X = ds.sel(day=range(current_day - train_lookback - per_eval_lookback, before1_day))[['close_0', 'open_0', 'pe']]
y = ds.sel(day=range(current_day - train_lookback - 1, before1_day))['return']

In [8]:
X

In [9]:
y

In [6]:
start_day = X.day.min().item()

526

In [15]:
xr.concat([X.sel(day=slice(start_day + i, start_day + i + per_eval_lookback - 1)).expand_dims(
    batch=[start_day + i + per_eval_lookback - 1]) for i in range(train_lookback)], dim='batch'
).stack(batch_asset=['batch', 'asset'], create_index=True).to_array('feature').transpose('batch_asset', 'day',
                                                                                         'feature')

In [23]:
X_list = []
for i in range(train_lookback):
    X_slice = X.sel(day=slice(start_day + i, start_day + i + per_eval_lookback - 1)).expand_dims(
        batch=[start_day + i + per_eval_lookback - 1])
    X_slice.coords['offset'] = X_slice.day - start_day - i
    X_slice_o = X_slice.swap_dims({'day': 'offset'})
    X_list.append(X_slice_o.reset_coords(drop=True))
X_concat = xr.concat(X_list, dim='batch')
X_concat

In [24]:
X_concat.isnull().any()

In [30]:
X_arr = X_concat.stack({'batch_asset': ['batch', 'asset']}).to_array('feature').transpose('batch_asset', 'offset', 'feature')

In [31]:
X_arr.values

array([[[21.1323, 21.5207, 25.1564],
        [21.2536, 21.108 , 25.3113],
        [21.108 , 21.108 , 25.1282],
        ...,
        [21.2172, 20.8288, 25.269 ],
        [21.2294, 21.2415, 25.2831],
        [21.0594, 21.2051, 25.0718]],

       [[27.3348, 27.0435, 36.5048],
        [27.2377, 27.007 , 36.3827],
        [27.444 , 27.4076, 36.6491],
        ...,
        [28.6578, 28.1237, 38.2807],
        [28.9127, 28.67  , 38.6136],
        [28.2815, 28.937 , 37.7701]],

       [[19.1538, 19.5179, 38.1431],
        [19.348 , 19.1416, 38.5318],
        [19.9185, 19.3237, 39.668 ],
        ...,
        [20.1734, 20.1612, 40.1664],
        [19.9185, 20.1734, 39.668 ],
        [19.8699, 19.9185, 39.5683]],

       ...,

       [[ 5.6684,  5.6563, 45.6377],
        [ 5.6442,  5.6684, 45.4732],
        [ 5.5835,  5.6078, 44.9798],
        ...,
        [ 5.5956,  5.6078, 45.062 ],
        [ 5.5592,  5.5956, 44.7331],
        [ 5.5471,  5.5713, 44.6509]],

       [[23.7783, 23.8269, 39.0692],
  

In [32]:
X_arr.batch_asset

In [47]:
y_pred_np = y.stack({'batch_asset': ['day', 'asset']}).values ** 2

In [51]:
from xarray import DataArray

y_pred_da = DataArray(data=y_pred_np, coords=dict(batch_asset=X_arr.batch_asset)).unstack('batch_asset').rename({'batch': 'day'})
y_pred_da