# Применение методов машинного обучения - линейная регрессия

В этом примере рассматривается одно из применений простейшего метода машинного обучения - линейной регрессии для предсказания будущих цен открытия инструментов.

In [1]:
import xarray as xr
import numpy as np
import pandas as pd

from qnt.data import ds, load_assets, load_data, write_output, restore_origin_data
from qnt.stepper import test_strategy
from qnt.stats import calc_stat
from qnt.graph import make_plot, make_plot_double, make_plot_filled

from sklearn.linear_model import LinearRegression

In [2]:
def liquid_assets_list(data):
    liquid_assets = data.loc[:, "is_liquid"].to_pandas().iloc[0, :] == 1
    return liquid_assets[liquid_assets == True].index

In [3]:
def sign_slope(X_train, y_train):
    model = LinearRegression()
    model.fit(X_train.reshape(-1,1), y_train.values.reshape(-1,1))
    return np.sign(model.coef_[0][0])

In [4]:
assets = load_assets()
assets_names = [x['id'] for x in assets]

data = load_data(min_date="2010-01-01", max_date="2016-12-31",
                 dims=('time', 'field', 'asset'), 
                 assets=assets_names)

fetched chunk 2016-08-11 -> 2016-12-31 10s
fetched chunk 2016-03-21 -> 2016-08-10 20s
fetched chunk 2015-10-30 -> 2016-03-20 30s
fetched chunk 2015-06-09 -> 2015-10-29 40s
fetched chunk 2015-01-17 -> 2015-06-08 50s
fetched chunk 2014-08-27 -> 2015-01-16 59s
fetched chunk 2014-04-06 -> 2014-08-26 72s
fetched chunk 2013-11-14 -> 2014-04-05 81s
fetched chunk 2013-06-24 -> 2013-11-13 90s
fetched chunk 2013-02-01 -> 2013-06-23 97s
fetched chunk 2012-09-11 -> 2013-01-31 106s
fetched chunk 2012-04-21 -> 2012-09-10 114s
fetched chunk 2011-11-30 -> 2012-04-20 122s
fetched chunk 2011-07-10 -> 2011-11-29 133s
fetched chunk 2011-02-17 -> 2011-07-09 141s
fetched chunk 2010-09-27 -> 2011-02-16 149s
fetched chunk 2010-05-07 -> 2010-09-26 155s
fetched chunk 2010-01-01 -> 2010-05-06 161s
Data loaded 161s


## Реализация

Рассмотрим первые 250 загруженных цен акции Apple. Разделим их на самую первую цену и с помощью линейной регрессии оценим трендовость цен.

In [5]:
model = LinearRegression()

x = np.arange(250)

prices = data.loc[:, "close", "NASDAQ:AAPL"].to_series().iloc[::-1]
y = prices.iloc[0:250] / prices.iloc[0]

model.fit(x.reshape(-1,1), y.values.reshape(-1,1))
z = np.arange(250)*model.coef_[0][0] + model.intercept_

make_plot_double(x, y, z, name1="markers", name2="lines")

Здесь мы видим возрастающий тренд цен. Будем считать, что следующие 20 дней цена будет расти, и вес на следующие 20 дней приравняем к знаку наклона. Через 20 дней снова рассмотрим предыдущие 250 дней, снова построим регрессию и обновим веса.

In [6]:
wforward = 20
wback = 250
weights = None

def step(data):
    global weights
       
    if (data.shape[0] - init_data_length - 1) % wforward == 0:        
        prices = data.loc[:, "open"].to_pandas().iloc[(wback-1)::-1,:].dropna(axis=1)
        y_train = prices.div(prices.iloc[-1, :], axis=1)
        weights = y_train.apply(lambda column: sign_slope(np.arange(wback), column), axis=0)
    
    liquid_assets = liquid_assets_list(data)  
    weights_adj = weights[liquid_assets]
    
    weights_sum = abs(weights_adj)
    weights_norm = weights_adj / weights_sum
    assets = weights_norm.index
    
    return xr.DataArray(
        weights_norm.values,
        dims = [ds.ASSET],
        coords = {ds.ASSET:assets}
    )

init_data_length = 250

output = test_strategy(data, step=step, init_data_length=init_data_length)

Testing started...
Testing progress: 301/1512 5s
Testing progress: 601/1512 10s
Testing progress: 821/1512 15s
Testing progress: 1001/1512 20s
Testing progress: 1201/1512 26s
Testing progress: 1421/1512 31s
Testing complete 33.827961444854736s


# Результаты

In [7]:
stat = calc_stat(data, output, slippage_factor=0.05)
print(stat.to_pandas().tail())

field         equity  relative_return  volatility  underwater  max_drawdown  \
time                                                                          
2016-12-23  1.175585         0.001243    0.089953   -0.114870      0.190851   
2016-12-27  1.177475         0.001608    0.089854   -0.113447      0.190851   
2016-12-28  1.171291        -0.005252    0.089988   -0.118103      0.190851   
2016-12-29  1.171550         0.000221    0.089904   -0.117908      0.190851   
2016-12-30  1.167096        -0.003802    0.089948   -0.121261      0.190851   

field       sharpe_ratio  mean_return      bias  instruments  trading_duration  
time                                                                            
2016-12-23     -0.779192    -0.070091  0.558442       1069.0         54.556200  
2016-12-27     -0.808201    -0.072620  0.551724       1069.0         54.291527  
2016-12-28     -0.850583    -0.076542  0.551724       1069.0         54.292908  
2016-12-29     -0.886087    -0.079663  0.

# Построение графиков

In [8]:
performance = stat.to_pandas()['equity']
make_plot_filled(performance.index, performance, name="PnL")

In [9]:
UWchart = stat.to_pandas()['underwater']
make_plot_filled(UWchart.index, UWchart, color="darkred", name="underwater chart")

In [10]:
SRchart = stat.to_pandas()['sharpe_ratio'].iloc[20:]
make_plot_filled(SRchart.index, SRchart, color="#F442C5", name="rolling SR")

In [11]:
#data = load_data(min_date="2015-01-01", dims=('time', 'field', 'asset'), assets=assets_names)
#output = test_strategy(data, step=step, init_data_length=init_data_length)
#write_output(output)