In [1]:
cd ..

/Users/kimyoungh/GoogleDrive/singlemolt/main/singlemolt


In [2]:
from collections import defaultdict
import numpy as np
import pandas as pd

from kingsman.data_processor import OHLCVDataProcessor, StrategyProcessor

In [3]:
# Input data
k200 = pd.read_csv('raw_data/k200.csv', header=0, index_col=0)
kq = pd.read_csv('raw_data/kq.csv', header=0, index_col=0)

k200.index = k200.index.astype(str)
kq.index = kq.index.astype(str)

ohlcv_data = defaultdict(pd.DataFrame)

ohlcv_data['K200'] = k200
ohlcv_data['KQ'] = kq

dp = OHLCVDataProcessor(ohlcv_data, translate_cols=True)

features, dates = dp.calc_feature_data()

# Return을 고려한 마지막 일자 제거
features = features[:, :-1]
dates = dates[:-1]

returns = dp.calc_returns()
returns = returns.loc[dates]

In [4]:
returns['K200_i'] = -returns['K200']
returns['KQ_i'] = -returns['KQ']

In [5]:
# Output data
st_weights = defaultdict(pd.DataFrame)
strategies = defaultdict(np.array)

strategies['k200'] = np.array([1., 0., 0., 0.])
strategies['kq'] = np.array([0., 1., 0., 0.])
strategies['k200_i'] = np.array([0., 0., 1., 0.])
strategies['kq_i'] = np.array([0., 0., 0., 1.])
strategies['k200-kq'] = np.array([0.5, 0., 0., 0.5])
strategies['kq-k200'] = np.array([0., 0.5, 0.5, 0.])

for key, value in strategies.items():
    w = pd.DataFrame(value.reshape(1, -1).repeat(returns.shape[0], 0),
                    index=returns.index, columns=returns.columns)

    st_weights[key] = w

stproc = StrategyProcessor(returns, st_weights, initial_t=0)

max_st, pos_list = stproc.calc_daily_best_strategies(mode='max')
min_st, pos_list = stproc.calc_daily_best_strategies(mode='min')

max_series, time_pos =\
    stproc.calc_daily_best_strategies_rebalanced(mode='max')
min_series, time_pos =\
    stproc.calc_daily_best_strategies_rebalanced(mode='min')

In [10]:
print(features.shape)
print(dates.shape)
print(returns.shape)
print(max_st.shape)
print(min_st.shape)
print(pos_list.shape)
print(max_series.shape)
print(min_series.shape)
print(time_pos.shape)

(2, 4998, 20)
(4998,)
(4998, 4)
(4998,)
(4998,)
(4998,)
(4997, 6, 2)
(4997, 6, 2)
(4997, 6, 2)


In [13]:
np.save('train_data/features.npy', features, allow_pickle=True)
np.save('train_data/dates.npy', dates, allow_pickle=True)
returns.to_csv('train_data/returns.csv')
np.save('train_data/max_st_series.npy', max_st, allow_pickle=True)
np.save('train_data/min_st_series.npy', min_st, allow_pickle=True)
np.save('train_data/best_pos_series.npy', pos_list, allow_pickle=True)
np.save('train_data/max_rebal_series.npy', max_series, allow_pickle=True)
np.save('train_data/min_rebal_series.npy', min_series, allow_pickle=True)
np.save('train_data/rebal_pos_series.npy', time_pos, allow_pickle=True)