# Create Continuous Time Feature

- Specify Date Range, Instrument, freq

In [11]:
%reload_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import database as db
import pandas_ta as ta
import utils
import os

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)

# Data Parameters
data_path  = './data2_2021/'
feature_path = './feature'
instrument = 'EG'
freq = '15min'
dates = np.array(db.GetTradeDates(data_path, instrument)[:])
start_date = '20210101'
end_date = '20211231'
dates = dates[np.where((dates >= start_date) & (dates <= end_date))]
# Read CSV Data
big = []
for date in dates:
    df = pd.read_csv(data_path + os.sep + instrument + os.sep + date + '.csv', index_col=0)
    df.index = pd.to_datetime(df.index)
    df['MidPrice'] = 0.5* (df['BidPrice1'] + df['AskPrice1'])
    ohlc = admin_utils.GetOHLC(df, freq)
    ohlc[['Contract', 'TradeDate', 'PreClose']] = df['InstrumentID'].iloc[0], df['TradingDay'].iloc[0], df['PreClosePrice'].iloc[0]
    
    big.append(ohlc)

big = pd.concat(big, axis=0)
close = big[['close']].shift(1)
close.columns=['last_close']
rsi6  = big.ta.rsi(6)
rsi14 = big.ta.rsi(14)
boll = big.ta.bbands(14)
big = pd.concat([big, close, rsi6, rsi14, boll], axis=1)
big.to_csv(feature_path + os.sep + '{}_feature_{}.csv'.format(instrument, freq))

In [9]:
db.GetTradeDates(data_path, instrument)

[]

# Daily Price Plot

In [None]:
import plotly.express as px
import plotly.graph_objects as go
import cufflinks as cf
from plotly.offline import init_notebook_mode, iplot
import utils
init_notebook_mode(connected=True)
cf.go_offline()

def GetBreaks(df, freq):
    df.index = pd.to_datetime(x.index, format='%Y%m%d')
    dt_all = pd.date_range(start=df.index[0], end=df.index[-1], freq=freq)
    dt_obs = [d.strftime('%Y-%m-%d') for d in pd.to_datetime(df.index)]
    dt_breaks = [d for d in dt_all.strftime('%Y-%m-%d').tolist() if not d in dt_obs]
    return dt_breaks

x = big.groupby('TradeDate')['close'].ohlc()
x.index = pd.to_datetime(x.index, format='%Y%m%d')
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=x.index, y=x['close']))
fig.update_xaxes(
    title_text='Date', 
    rangebreaks=[dict(values=GetBreaks(x, '1d'))]
)
fig.update_layout(
    title='Instrument {} Daily Price'.format(instrument)
)
fig.show()

In [None]:
x = big.groupby(['Contract', 'TradeDate'])['close'].ohlc().reset_index().set_index('TradeDate')
contracts = x['Contract'].unique()
x.index = pd.to_datetime(x.index, format='%Y%m%d')


res = []
for contract in contracts:
    z = x.query('Contract == @contract')[['close']] 
    z.columns = [contract]
    res.append(z)
res = pd.concat(res,axis=0)



fig = go.Figure()
for contract in contracts:
    fig.add_trace(go.Scatter(
        x=res.index, y=res[contract], name=contract))
fig.update_xaxes(
    title_text='Date', 
    rangebreaks=[dict(values=GetBreaks(x, '1d'))]
)
fig.update_layout(
    title='Instrument {} Daily Price (有缺口)'.format(instrument))
fig.show()

# 向前加减法

In [None]:
x = big.groupby(['Contract', 'TradeDate'])['close'].ohlc().reset_index().set_index('TradeDate')
contracts = x['Contract'].unique()
x.index = pd.to_datetime(x.index, format='%Y%m%d')

y = x[['Contract', 'close']]
y['tag'] = np.where(y['Contract'] != y['Contract'].shift(1).fillna('NA'), 1, 0)
y['tag_num'] = np.where(y['tag'] == 1, y['close'].diff().fillna(0), np.nan)
y = y[y['tag'] == 1].set_index('Contract')['tag_num'].cumsum().to_dict()

display(y)

res = []
for contract in contracts:
    z = x.query('Contract == @contract')[['close']] - y[contract]
    z.columns = [contract]
    res.append(z)
res = pd.concat(res,axis=0)



fig = go.Figure()
for contract in contracts:
    fig.add_trace(go.Scatter(
        x=res.index, y=res[contract], name=contract))
fig.update_xaxes(
    title_text='Date', 
    rangebreaks=[dict(values=GetBreaks(x, '1d'))]
)
fig.update_layout(
    title='Instrument {} Daily Price (向前加减法)'.format(instrument))
to_hide = []
fig.for_each_trace(lambda trace: trace.update(visible='legendonly') if trace.name in to_hide else ())
fig.show()

# Features

In [74]:
def diff(df, subset=None):  
    cols = df.columns if subset == None else subset
    for name in cols:
        df['diff__{}'.format(name)] = df[name].diff()
    return df

def shift(df, lags=[1], subset=None):  
    cols = df.columns if subset == None else subset
    for name in cols:
        for lag in lags:
            colname = 'shift{}__{}'.format(lag, name)
            df[colname] = df[name].shift(lag)
    return colname

def imbalance(df, a, b, name=None):
    name = 'imbalance__{}__{}'.format(a, b) if name == None else name
    df[name] = (df[a] - df[b]) / (df[a] + df[b])
    return name
    
def ratio(df, a, b, name=None):
    name = 'ratio__{}__{}'.format(a, b) if name == None else name
    df[name] = df[a] / df[b]
    return name

def delta(df, a, b, name=None):
    name = 'delta__{}__{}'.format(a, b) if name == None else name
    df[name] = df[a] - df[b]
    return name

def GenRSI(df, N=6):
    """
    RSI Generation
    """
    df[f'f__RSI{N}']   = df.ta.rsi(N).values
    df[f'f__RSI{N*4}'] = df.ta.rsi(N*4).values,
    df['f__RSI_LSDeviate'] = df.eval(f'rsi{N} - rsi{N*4}')
    df['f__RSI_GCross'] = np.where((df['f__RSI_LSDeviate'] > 0) & (df['f__RSI_LSDeviate'].shift(1) < 0) , df['close'], np.nan)
    df['f__RSI_DCross'] = np.where((df['f__RSI_LSDeviate'] < 0) & (df['f__RSI_LSDeviate'].shift(1) > 0) , df['close'], np.nan)
    return df

def GenBoll(df, N=15):
    boll = big.ta.bbands(N).iloc[:,0:3]
    boll = pd.concat([big, boll], axis=1)

    cols = [x for x in boll.columns if 'BB' in x]
    for col in cols:
        boll[f'f__{col[:3]}_diff']  = boll[col].diff()
        boll[f'f__{col[:3]}_slope'] = (boll[col] - boll[col].shift(1)) /23 #/ big['upset'] /  23
        boll[f'f__{col[:3]}_angle'] = boll[f'{col[:3]}_slope'].map(atan) * 180 / np.pi

    delta(boll, 'BBU_slope', 'BBM_slope')
    delta(boll, 'BBM_slope', 'BBL_slope')
    return boll

## Make DataFrame

In [65]:
%reload_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import database as db
import pandas_ta as ta
import utils
import os

pd.set_option('display.max_rows', 100)


# Data Parameters
data_path  = './data2_2021/'
feature_path = './feature'
instrument = 'EG'
freq = '15min'
dates = np.array(db.GetTradeDates(data_path, instrument)[:])
start_date = '20210601'
end_date = '20210731'
dates = dates[np.where((dates >= start_date) & (dates <= end_date))]
# Read CSV Data
big = []
for date in dates:
    df = pd.read_csv(data_path + os.sep + instrument + os.sep + date + '.csv', index_col=0)
    df.index = pd.to_datetime(df.index)
    df['MidPrice'] = 0.5* (df['BidPrice1'] + df['AskPrice1'])
    ohlc = utils.GetOHLC(df, freq)
    ohlc[['Contract', 'TradeDate', 'PreClose', 'PreSettlement', 'UpperLimit', 'LowerLimit']] = \
    df['InstrumentID'].iloc[0], df['TradingDay'].iloc[0], \
    df['PreClosePrice'].iloc[0], df['PreSettlementPrice'].iloc[0], \
    df['UpperLimitPrice'].iloc[0], df['LowerLimitPrice'].iloc[0]
    ohlc['upset'] = df['UpperLimitPrice'].iloc[0] - df['PreSettlementPrice'].iloc[0]
    ohlc['dnset'] = df['LowerLimitPrice'].iloc[0] - df['PreSettlementPrice'].iloc[0]
    assert (ohlc['upset'] == ohlc['dnset'].abs()).all() == True
    big.append(ohlc)

big = pd.concat(big, axis=0)
display(big.head(), big.describe())


Unnamed: 0_level_0,open,high,low,close,volume,Contract,TradeDate,PreClose,PreSettlement,UpperLimit,LowerLimit,upset,dnset
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2021-05-31 21:15:00,5043.5,5056.5,5009.5,5025.5,45050.0,eg2109,20210601,4992.0,4986.0,5384.0,4588.0,398.0,-398.0
2021-05-31 21:30:00,5025.5,5042.0,5016.5,5039.5,16406.0,eg2109,20210601,4992.0,4986.0,5384.0,4588.0,398.0,-398.0
2021-05-31 21:45:00,5040.5,5044.5,5026.5,5037.0,10102.0,eg2109,20210601,4992.0,4986.0,5384.0,4588.0,398.0,-398.0
2021-05-31 22:00:00,5037.5,5041.5,5013.5,5020.5,18387.0,eg2109,20210601,4992.0,4986.0,5384.0,4588.0,398.0,-398.0
2021-05-31 22:15:00,5020.5,5039.5,5016.5,5036.0,7724.0,eg2109,20210601,4992.0,4986.0,5384.0,4588.0,398.0,-398.0


Unnamed: 0,open,high,low,close,volume,TradeDate,PreClose,PreSettlement,UpperLimit,LowerLimit,upset,dnset
count,728.0,728.0,728.0,728.0,728.0,728.0,728.0,728.0,728.0,728.0,728.0,728.0
mean,5128.069,5140.852,5115.044,5127.907,15598.721,20210682.705,5117.316,5116.297,5525.077,4707.516,408.78,-408.78
std,220.057,222.16,218.077,220.588,9014.109,50.96,208.65,204.011,220.351,187.671,16.342,16.342
min,4710.5,4725.5,4700.5,4708.5,4592.0,20210601.0,4774.0,4749.0,5128.0,4370.0,379.0,-439.0
25%,4986.375,4995.5,4977.25,4986.125,9759.5,20210610.0,4988.0,4986.0,5384.0,4588.0,398.0,-426.0
50%,5092.0,5105.5,5080.5,5092.0,13040.5,20210709.0,5089.0,5069.0,5474.0,4664.0,405.0,-405.0
75%,5332.75,5347.75,5317.5,5333.5,18340.0,20210721.0,5341.0,5333.0,5759.0,4907.0,426.0,-398.0
max,5581.5,5586.5,5562.5,5581.0,82105.0,20210730.0,5447.0,5499.0,5938.0,5060.0,439.0,-379.0


## Create Features

In [75]:
big.pipe(GenRSI)

AttributeError: 'numpy.ndarray' object has no attribute 'ta'

## RSI

In [9]:
import utils
import plotly.graph_objects as go
from plotly.subplots import make_subplots

N=6
df = pd.DataFrame({
    f'rsi_{N}': big.ta.rsi(N).values,
    f'rsi_4{N}': big.ta.rsi(N*4).values,
}, index=big.index)
df['close'] = big.close.values
df['f__RSI_LSDeviate'] = df.eval(f'rsi_{N} - rsi_4{N}')
df['f__RSI_GCross'] = np.where((df['f__RSI_LSDeviate'] > 0) & (df['f__RSI_LSDeviate'].shift(1) < 0) , df['close'], np.nan)
df['f__RSI_DCross'] = np.where((df['f__RSI_LSDeviate'] < 0) & (df['f__RSI_LSDeviate'].shift(1) > 0) , df['close'], np.nan)
display(df)
df = df.head(500)

fig = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.01, row_heights=[0.6,0.4])
for col in df.columns:
    if 'close' in col:
        fig.add_trace(go.Scatter(x=df.index, y=df[col], name=col), row=1, col=1)
    elif 'GCross' in col:
        fig.add_trace(go.Scatter(x=df.index, y=df[col], name=col, mode='markers', marker=dict(size=10, symbol='x', color='yellow')), row=1, col=1)
    elif 'DCross' in col:
        fig.add_trace(go.Scatter(x=df.index, y=df[col], name=col, mode='markers', marker=dict(size=10, symbol='x', color='black')), row=1, col=1)
    else:
        fig.add_trace(go.Scatter(x=df.index, y=df[col], name=col), row=2, col=1)
fig.update_xaxes(
    title_text='Date', 
    rangebreaks=[dict(values=utils.GetBreaks(df, '15min'), dvalue=900000)],
    row=2, col=1
)
fig.update_layout(
    title=f'RSI Feature Visualization (Short: {N} Long: {N*4})',
    autosize=False, width=1000, height=800,
    yaxis=dict(autorange=True, fixedrange=False),
    showlegend=True,
)
fig.show()

Unnamed: 0_level_0,rsi_6,rsi_46,close,f__RSI_LSDeviate,f__RSI_GCross,f__RSI_DCross
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-10-31 21:15:00,,,5496.5,,,
2021-10-31 21:30:00,,,5518.5,,,
2021-10-31 21:45:00,,,5520.5,,,
2021-10-31 22:00:00,,,5541.5,,,
2021-10-31 22:15:00,,,5513.5,,,
...,...,...,...,...,...,...
2021-11-18 14:00:00,55.645147,44.147290,5044.0,11.497857,,
2021-11-18 14:15:00,38.033610,40.764751,5010.5,-2.731141,,5010.5
2021-11-18 14:30:00,29.634338,38.469482,4985.5,-8.835143,,
2021-11-18 14:45:00,27.053389,37.672660,4976.5,-10.619271,,


## BAR

In [None]:
import plotly.graph_objects as go
import utils
fig = go.Figure()
fig.add_trace(go.Candlestick(
    x=df.index, open=df['open'], high=df['high'], low=df['low'], close=df['close'], showlegend=False,
    increasing=dict(fillcolor='#ff0000', line=dict(color='#000000', width=0.3)),
    decreasing=dict(fillcolor='#00ff44', line=dict(color='#000000', width=0.3)),
    #hoverinfo='none',
    opacity=0.7,
))
fig.update_xaxes(
    title_text='Date', 
    rangeslider_visible=False,
    #xaxis=dict(type='category', linewidth=2),
    rangebreaks=[dict(values=utils.GetBreaks(df, freq), dvalue=900000)],
)
fig.update_layout(
    title='CTP Based Backtest Report',
    #xaxis=dict(type='category', linewidth=2), 
    autosize=False, width=1000, height=1000,
    yaxis=dict(autorange=True, fixedrange=False),
    showlegend=True,
)
              

In [None]:
utils.GetBreaks(df, freq)

In [None]:
import plotly.graph_objects as go
import numpy as np

# Create figure
fig = go.Figure()

# Add traces, one for each slider step
for step in np.arange(0, 5, 0.1):
    fig.add_trace(
        go.Scatter(
            visible=False,
            line=dict(color="#00CED1", width=6),
            name="𝜈 = " + str(step),
            x=np.arange(0, 10, 0.01),
            y=np.sin(step * np.arange(0, 10, 0.01))))

# Make 10th trace visible
fig.data[10].visible = True

# Create and add slider
steps = []
for i in range(len(fig.data)):
    step = dict(
        method="update",
        args=[{"visible": [False] * len(fig.data)},
              {"title": "Slider switched to step: " + str(i)}],  # layout attribute
    )
    step["args"][0]["visible"][i] = True  # Toggle i'th trace to "visible"
    steps.append(step)

sliders = [dict(
    active=10,
    currentvalue={"prefix": "Frequency: "},
    pad={"t": 50},
    steps=steps
)]

fig.update_layout(
    sliders=sliders
)

fig.show()

## Boll

In [55]:
%reload_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
import database as db
import pandas_ta as ta
import utils
import os
from math import atan, pi 

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
pd.options.display.float_format = '{:.3f}'.format

# Make Features
boll = big.ta.bbands(15).iloc[:,0:3]
boll = pd.concat([big, boll], axis=1)

cols = [x for x in boll.columns if 'BB' in x]
for col in cols:
    boll[f'{col[:3]}_diff']  = boll[col].diff()
    boll[f'{col[:3]}_slope'] = (boll[col] - boll[col].shift(1)) /23 #/ big['upset'] /  23
    boll[f'{col[:3]}_angle'] = boll[f'{col[:3]}_slope'].map(atan) * 180 / np.pi

delta(boll, 'BBU_slope', 'BBM_slope')
delta(boll, 'BBM_slope', 'BBL_slope')
#temp= boll.query('TradeDate >= 20211102')[[x for x in boll.columns if 'BB' in x]]
display(boll.shape, boll.head(), boll.describe())
#temp.to_csv('BBSlopeAngle.csv')

(728, 27)

Unnamed: 0_level_0,open,high,low,close,volume,Contract,TradeDate,PreClose,PreSettlement,UpperLimit,LowerLimit,upset,dnset,BBL_15_2.0,BBM_15_2.0,BBU_15_2.0,BBL_diff,BBL_slope,BBL_angle,BBM_diff,BBM_slope,BBM_angle,BBU_diff,BBU_slope,BBU_angle,delta__BBU_slope__BBM_slope,delta__BBM_slope__BBL_slope
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
2021-05-31 21:15:00,5043.5,5056.5,5009.5,5025.5,45050.0,eg2109,20210601,4992.0,4986.0,5384.0,4588.0,398.0,-398.0,,,,,,,,,,,,,,
2021-05-31 21:30:00,5025.5,5042.0,5016.5,5039.5,16406.0,eg2109,20210601,4992.0,4986.0,5384.0,4588.0,398.0,-398.0,,,,,,,,,,,,,,
2021-05-31 21:45:00,5040.5,5044.5,5026.5,5037.0,10102.0,eg2109,20210601,4992.0,4986.0,5384.0,4588.0,398.0,-398.0,,,,,,,,,,,,,,
2021-05-31 22:00:00,5037.5,5041.5,5013.5,5020.5,18387.0,eg2109,20210601,4992.0,4986.0,5384.0,4588.0,398.0,-398.0,,,,,,,,,,,,,,
2021-05-31 22:15:00,5020.5,5039.5,5016.5,5036.0,7724.0,eg2109,20210601,4992.0,4986.0,5384.0,4588.0,398.0,-398.0,,,,,,,,,,,,,,


Unnamed: 0,open,high,low,close,volume,TradeDate,PreClose,PreSettlement,UpperLimit,LowerLimit,upset,dnset,BBL_15_2.0,BBM_15_2.0,BBU_15_2.0,BBL_diff,BBL_slope,BBL_angle,BBM_diff,BBM_slope,BBM_angle,BBU_diff,BBU_slope,BBU_angle,delta__BBU_slope__BBM_slope,delta__BBM_slope__BBL_slope
count,728.0,728.0,728.0,728.0,728.0,728.0,728.0,728.0,728.0,728.0,728.0,728.0,714.0,714.0,714.0,713.0,713.0,713.0,713.0,713.0,713.0,713.0,713.0,713.0,713.0,713.0
mean,5128.069,5140.852,5115.044,5127.907,15598.721,20210682.705,5117.316,5116.297,5525.077,4707.516,408.78,-408.78,5070.293,5125.566,5180.838,0.524,0.023,1.192,0.569,0.025,1.404,0.614,0.027,1.245,0.002,0.002
std,220.057,222.16,218.077,220.588,9014.109,50.96,208.65,204.011,220.351,187.671,16.342,16.342,210.975,218.023,228.432,7.983,0.347,16.876,4.94,0.215,11.787,8.592,0.374,18.095,0.29,0.29
min,4710.5,4725.5,4700.5,4708.5,4592.0,20210601.0,4774.0,4749.0,5128.0,4370.0,379.0,-439.0,4696.865,4747.6,4784.192,-30.962,-1.346,-53.393,-14.067,-0.612,-31.45,-43.182,-1.877,-61.959,-2.321,-2.321
25%,4986.375,4995.5,4977.25,4986.125,9759.5,20210610.0,4988.0,4986.0,5384.0,4588.0,398.0,-426.0,4957.138,4986.392,5024.88,-2.768,-0.12,-6.863,-2.5,-0.109,-6.203,-3.005,-0.131,-7.445,-0.143,-0.143
50%,5092.0,5105.5,5080.5,5092.0,13040.5,20210709.0,5089.0,5069.0,5474.0,4664.0,405.0,-405.0,5034.891,5090.75,5141.729,0.274,0.012,0.682,0.633,0.028,1.577,0.05,0.002,0.124,-0.011,-0.011
75%,5332.75,5347.75,5317.5,5333.5,18340.0,20210721.0,5341.0,5333.0,5759.0,4907.0,426.0,-398.0,5268.485,5333.492,5393.849,4.18,0.182,10.302,3.933,0.171,9.705,3.724,0.162,9.198,0.115,0.115
max,5581.5,5586.5,5562.5,5581.0,82105.0,20210730.0,5447.0,5499.0,5938.0,5060.0,439.0,-379.0,5451.998,5522.867,5621.504,63.582,2.764,70.113,15.133,0.658,33.344,41.282,1.795,60.876,1.54,1.54
