# SGD Linear Regression with numpy and numba

# Initialization

In [None]:
import xarray as xr
import numpy as np
import numba as nb

from qnt.data import ds, load_assets, load_data, write_output, restore_origin_data, \
    filter_liquids_xarray_assets_dataarray, from_dict_to_xarray_1d, \
    check_weights_xarray_dataarray_for_nonliquids, exclude_weights_xarray_dataarray_from_nonliquids
from qnt.stepper import test_strategy
from qnt.stats import calc_stat, print_correlation
from qnt.graph import make_plot, make_plot_double, make_plot_filled

# Data

In [None]:
loaded_desc_ordered_xarray_dataarray = load_data(min_date = "2016-06-01",
                                                                                 dims=('time', 'field', 'asset'))
loaded_desc_ordered_xarray_dataarray.coords

# Model

Let's check for first of k_days=250 prices items. We will normalize it on first price and will rate prices trands with the simpliest Linear Regression model.

If we'll see an increasing price trend, then we decide that:
* the next n_days=50 days the price will increase
* the weight for the next n_days=50 days will be equated to the tilt sign

After n_days=50 days, let's review the previous k_days=250 days, build the regression again and update the weights.

In [None]:
k_days = 250
X = np.arange(k_days)
n_days = 50


@nb.jitclass([
    ('slope', nb.float64),
    ('intercept', nb.float64),
    ('learning_rate', nb.float64),
    ('start_slope', nb.float64),
    ('start_intercept', nb.float64),
    ('max_iter', nb.float64),
])
class SGDRegressor(object):

    def __init__(self, max_iter):
        self.slope = 0
        self.intercept = 0
        
        self.learning_rate = 0.0001
        self.start_slope = 0
        self.start_intercept = 0
        self.max_iter = max_iter
    
    def fit(self, X_train, y_train):
        s_slope = self.start_slope
        s_intercept = self.start_intercept
        
        for i in range(self.max_iter):
            int_slope = 0
            int_intercept = 0
            n_pt = float(len(X_train))
            
            for i in range(len(X_train)):
                int_intercept += - (2/n_pt) * (y_train[i] - ((s_slope * X_train[i]) + s_intercept))
                int_slope += - (2/n_pt) * X_train[i] * (y_train[i] - ((s_slope * X_train[i]) + s_intercept))
            
            final_slope = s_slope - (self.learning_rate * int_slope)
            final_intercept = s_intercept - (self.learning_rate * int_intercept)
            s_slope = final_slope
            s_intercept = final_intercept
            
            self.slope = s_slope
            self.intercept = s_intercept


weights_dict = dict()

# this function is optional, just for tests and to avoid forward-looking;
# later it will be used together with test_strategy(...)
def step(desc_ordered_assets_xarray_dataarray, debug=False):
    global weights_dict

    desc_filtered_xarray_assets_dataarray = filter_liquids_xarray_assets_dataarray(desc_ordered_assets_xarray_dataarray)
    assets_list = desc_filtered_xarray_assets_dataarray.coords['asset'].values
    
    # check that every n_days condition isn't raised yet
    if not debug and (desc_filtered_xarray_assets_dataarray.shape[0] - k_days - 1) % n_days != 0:

        # get xarray_dataarray representation
        weights_xarray_dataarray = from_dict_to_xarray_1d(weights_dict)

        # exclude all assets that became non-liquid since the last regression
        liquid_weights_xarray_dataarray = \
            exclude_weights_xarray_dataarray_from_nonliquids(weights_xarray_dataarray, desc_ordered_assets_xarray_dataarray)

        return liquid_weights_xarray_dataarray

    # reset weights_dict
    # to prevent global var to keep past assets that may became non-liquid
    weights_dict = dict()

    # else do the regression every n_days
    for asset in assets_list:

        # for the training purpose we need an asceding order: [::-1]
        asc_last_k_prices = desc_filtered_xarray_assets_dataarray.loc[:, "open", asset][k_days-1::-1]        
        a_last_day_price = asc_last_k_prices[-1]

        # fill NaN values with 0, it isn't the best, but simple
        y_train = (asc_last_k_prices / a_last_day_price).fillna(0)
        model = SGDRegressor(max_iter=50)

        model.fit(X, y_train.values)
        weights_dict[asset] = np.sign(model.slope)
        
    weights_xarray_dataarray = from_dict_to_xarray_1d(weights_dict)

    # if weights_dict is empty there is no need to exclude
    # weights_xarray_dataarray = \
    #     exclude_weights_xarray_dataarray_from_nonliquids(weights_xarray_dataarray,
    #                                                                                                          desc_ordered_assets_xarray_dataarray)

    # optional double check
    check_weights_xarray_dataarray_for_nonliquids(weights_xarray_dataarray, desc_ordered_assets_xarray_dataarray)

    return weights_xarray_dataarray

# Backtest

In [None]:
output = test_strategy(loaded_desc_ordered_xarray_dataarray, step=step, init_data_length=k_days)

## Stats and plots

In [None]:
stat = calc_stat(loaded_desc_ordered_xarray_dataarray, output, slippage_factor=0.05)
display(stat.to_pandas().tail())

In [None]:
make_plot_filled(stat.coords['time'].to_pandas(), stat.loc[:, 'equity'].values,  color="blue", name="PnL (Equity)", type="log")

In [None]:
make_plot_filled(stat.coords['time'].to_pandas(), stat.loc[:, 'underwater'].values, color="red", name="Underwater Chart", range_max= 0)

In [None]:
make_plot_filled(stat.coords['time'].to_pandas(), stat.loc[:, 'sharpe_ratio'].values[20:], color="purple", name="Rolling SR")

In [None]:
make_plot_filled(stat.coords['time'].to_pandas(), stat.loc[:, 'bias'].values, color="gray", name="Bias")

In [None]:
print_correlation(output, loaded_desc_ordered_xarray_dataarray)

# Submit

In [None]:
write_output(output)