In [None]:
%matplotlib inline
# %load_ext autoreload
# %autoreload 2

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
from pandas import Series
import numpy as np
from sklearn.linear_model import LinearRegression

from pipeline.parse_raw_df import pre_process_df_with_date_time, pre_process_df_with_date
from datatools import data_quantization, extract_market_data
from pipeline import Dataset, N_train_days, N_test_days, N_asset, N_timeslot
from pipeline.backtest import cross_validation, evaluation_for_submission
from visualization.metric import plot_performance

from tqdm.auto import trange
from qids_lib import QIDS

In [None]:
dataset = Dataset.load('../../data/parsed')
qids = QIDS(path_prefix='../../')

In [None]:
m_intraday_df = extract_market_data(dataset.market)
df = pd.concat([dataset.fundamental, m_intraday_df, dataset.ref_return], axis=1).dropna()

In [None]:
market_weighted_return = m_intraday_df.groupby(level=0).apply(lambda df: np.average(df['daily_return'].values, weights=df['mean_volume'].values * df['avg_price'].values))

In [None]:
market_simple_return = m_intraday_df.groupby(level=0).apply(lambda df: np.average((df['daily_return']).values))

In [None]:
plt.figure()
plt.plot(np.log(market_simple_return+1).cumsum())
plt.show()

In [None]:
market_index_list = []
current_market_index = 1
for day, slice_df in df.groupby(level=0):
    print(day)

In [None]:
plt.figure()
for i in range(54):
    price = df.query(f'asset == {i}')['avg_price']
    plt.plot(price.values / price.values[0], label=f'{i}')
plt.legend()
plt.show()

In [None]:
plt.figure()
for i in range(54):
    ret = df.query(f'asset == {i}')['return']
    plt.plot(ret.rolling(10).mean().values, label=f'{i}')
plt.legend()
plt.show()

In [None]:
df

In [None]:
market_simple_return.autocorr(3)

In [None]:
exp_1_w_ret = market_weighted_return.ewm(halflife=1).mean().rename('exp_1_w_ret')
exp_3_w_ret = market_weighted_return.ewm(halflife=3).mean().rename('exp_3_w_ret')
exp_5_w_ret = market_weighted_return.ewm(halflife=5).mean().rename('exp_5_w_ret')
exp_10_w_ret = market_weighted_return.ewm(halflife=10).mean().rename('exp_10_w_ret')

In [None]:
exp_ret = pd.concat([exp_1_w_ret, exp_3_w_ret, exp_5_w_ret, exp_10_w_ret], axis=1)
exp_df = dataset.ref_return.merge(exp_ret, right_index=True, left_on='day').dropna()
full_df = pd.concat([dataset.fundamental, exp_df], axis=1).dropna()

features = ['turnoverRatio', 'transactionAmount', 'pe_ttm', 'pe', 'pb', 'ps', 'pcf', 'exp_1_w_ret', 'exp_3_w_ret', 'exp_5_w_ret', 'exp_10_w_ret']

X = full_df[features]
y = full_df['return']
reg = LinearRegression().fit(X, y)
reg.score(X, y)

In [None]:
plt.plot(exp_10_w_ret)

In [None]:
def linear_model(X, y):
    reg = LinearRegression().fit(X, y)
    return reg

In [None]:
performance = cross_validation(linear_model, features, full_df, n_splits=997, lookback_window=200)

In [None]:
plt.figure()
plot_performance(performance, metrics_selected=['train_r2', 'val_cum_r2', 'val_cum_pearson'])

In [None]:
performance = cross_validation(linear_model, features, full_df, n_splits=997, lookback_window=200)

In [None]:
plt.figure()
plot_performance(performance, metrics_selected=['train_r2', 'val_cum_r2', 'val_cum_pearson'])