In [0]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [0]:
# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [0]:
import pandas as pd
import numpy as np
import datetime as dt
import timeit
import os.path

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
IH_dir = '/content/drive/My Drive/DL_Project/IH/'

In [0]:
front_month_map = {1: '1802', 2: '1803', 3: '1804', 4: '1805', 5: '1806', 6: '1807', 
           7: '1808', 8: '1809', 9: '1810', 10: '1811', 11: '1812', 12: '1901'}

In [0]:
Morning_Start = dt.timedelta(hours=9, minutes=30)
Morning_End = dt.timedelta(hours=11, minutes=30)
Afternoon_Start = dt.timedelta(hours=13)
Afternoon_End = dt.timedelta(hours=15)

In [0]:
def regularize(df, fill_last, fill_zero, reg_col = 'datetime', reg_str='0.5S'):
  df = df.set_index(reg_col)
  df = df.resample(reg_str).last()
  # ffill: propagate last valid observation forward to next valid
  df[fill_last] = df[fill_last].fillna(method='ffill')
  df[fill_zero] = df[fill_zero].fillna(0)
  return df

In [0]:
def add_features(df):
  
  # helper start:
  df['mid'] = 0.5*(df['bid1'] + df['ask1'])
  df['wmid'] = (df['bid1']*df['askv1'] + df['ask1']*df['bidv1']) / (df['askv1'] + df['bidv1'])
  
  df['prev_bid'] = df['bid1'].shift(1)
  df['prev_ask'] = df['ask1'].shift(1)

  df['trade_dir'] = 0 # approximation
  df.loc[((df['mid'] > df['mid'].shift(1)) | 
      (df['last'] >= df['ask1'].shift(1)) ) & (df['volume'] > 0), 'trade_dir'] = 1
  df.loc[((df['mid'] < df['mid'].shift(1)) | 
      (df['last'] <= df['bid1'].shift(1)) ) & (df['volume'] > 0), 'trade_dir'] = -1

  return df

In [0]:
def add_trade_strategy(df):
  max_pos = 3
  pos = 0
  cash = 0.0
  action = 0
  # enter_price = np.nan
  df['pos'] = 0.0
  df['cash'] = 0.0
  df['action'] = 0

  for i in df.index:
    if df.loc[i, 'trade_dir'] == 1:
      if pos == 0:
        pos -= 1
        cash += df.loc[i, 'prev_ask']
        action = -1
      elif pos > 0:
        cash += pos * df.loc[i, 'prev_ask']
        pos = 0
        action = -1
      else:  # pos < 0:
        if pos <= -max_pos:
          action = 0
        else:
          pos -= 1
          cash += df.loc[i, 'prev_ask']
          action = -1
    
    elif df.loc[i, 'trade_dir'] == -1:
      if pos == 0:
        pos += 1
        cash -= df.loc[i, 'prev_bid']
        action = 1
      elif pos < 0:
        cash -= np.abs(pos) * df.loc[i, 'prev_bid']
        pos = 0
        action = 1
      else:  # pos > 0:
        if pos >= max_pos:
          action = 0
        else:
          pos += 1
          cash -= df.loc[i, 'prev_bid']
          action = 1
    else:
      action = 0


    df.loc[i, 'pos'] = pos
    df.loc[i, 'cash'] = cash
    df.loc[i, 'action'] = action
  
  df['cum_pnl'] = df['pos'] * df['mid'] + df['cash']

  return df


In [0]:
def build_one_day_IH(df, morning_session_start, morning_session_end,
           afternoon_session_start, afternoon_session_end,
           pnl_dict):
  df.rename(columns = {' instrument': 'instrument',
                     ' datetime': 'datetime',
                     ' last': 'last',
                     ' opi': 'opi',
                     ' turnover': 'turnover',
                     ' volume': 'volume',
                     ' bid1': 'bid1',
                     ' ask1': 'ask1',
                     ' bidv1': 'bidv1',
                     ' askv1': 'askv1'}, inplace = True)
  
  df = df[['datetime', 'last', 'opi', 'turnover', 'volume', 'bid1', 'ask1', 'bidv1', 'askv1']]
  fill_last_cols = ['last', 'opi', 'bid1', 'ask1', 'bidv1', 'askv1']
  fill_zero_cols = ['turnover', 'volume']

  df['datetime'] = pd.to_datetime(df['datetime'])

  df_am = df[(df['datetime'] >= morning_session_start) & 
             (df['datetime'] <= morning_session_end)]
  df_pm = df[(df['datetime'] >= afternoon_session_start) & 
             (df['datetime'] <= afternoon_session_end)]
  
  df_am = regularize(df_am, fill_last_cols, fill_zero_cols)
  df_pm = regularize(df_pm, fill_last_cols, fill_zero_cols)
  
  df_am = add_features(df_am)
  df_pm = add_features(df_pm)


  df_am = add_trade_strategy(df_am)
  df_pm = add_trade_strategy(df_pm)
  
  pnl_dict['am'] = df_am['cum_pnl'].iloc[-1]
  pnl_dict['pm'] = df_pm['cum_pnl'].iloc[-1]

  # merge rows
  df = pd.concat([df_am, df_pm])
  return df
  

In [0]:
start_time = timeit.default_timer()

pnl_summary = {'date': [], 'am': [], 'pm': []}

for trade_date in pd.date_range('20180101', '20181231'):
  contract = 'IH' + front_month_map[trade_date.month] + '_' + trade_date.strftime('%Y%m%d')
  
  if not os.path.exists(IH_dir + contract + '.csv'):
    # print(IH_dir + contract + '.csv', ' not found')
    continue
  
  print('Processing', trade_date.date(), ' Contract:', contract)
  
  IH = pd.read_csv(IH_dir + contract + '.csv')
  
  morning_start = trade_date + Morning_Start
  morning_end = trade_date + Morning_End
  afternoon_start = trade_date + Afternoon_Start
  afternoon_end = trade_date + Afternoon_End
  
  pnl_cur = {'am': 0.0, 'pm': 0.0}

  IH = build_one_day_IH(IH, morning_start, morning_end,
              afternoon_start, afternoon_end, pnl_cur)
  
  pnl_summary['date'].append(trade_date.date())
  pnl_summary['am'].append(pnl_cur['am'])
  pnl_summary['pm'].append(pnl_cur['pm'])

  # IH_dropna = IH.dropna()
  
  IH.to_csv('/content/drive/My Drive/DL_Project/mm_strategy/mm_backtest_' + 
            trade_date.strftime('%Y%m%d') + '.csv')

pnl_summary = pd.DataFrame.from_dict(pnl_summary)
pnl_summary['cum_pnl'] = pnl_summary['am'] + pnl_summary['pm']
pnl_summary.to_csv('/content/drive/My Drive/DL_Project/mm_strategy/pnl_summary.csv')
print('Time took: ', timeit.default_timer() - start_time)

Processing 2018-01-02  Contract: IH1802_20180102


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Processing 2018-01-03  Contract: IH1802_20180103


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Processing 2018-01-04  Contract: IH1802_20180104
Processing 2018-01-05  Contract: IH1802_20180105
Processing 2018-01-08  Contract: IH1802_20180108
Processing 2018-01-09  Contract: IH1802_20180109
Processing 2018-01-10  Contract: IH1802_20180110
Processing 2018-01-11  Contract: IH1802_20180111
Processing 2018-01-12  Contract: IH1802_20180112
Processing 2018-01-15  Contract: IH1802_20180115
Processing 2018-01-16  Contract: IH1802_20180116
Processing 2018-01-17  Contract: IH1802_20180117
Processing 2018-01-18  Contract: IH1802_20180118
Processing 2018-01-19  Contract: IH1802_20180119
Processing 2018-01-22  Contract: IH1802_20180122
Processing 2018-01-23  Contract: IH1802_20180123
Processing 2018-01-24  Contract: IH1802_20180124
Processing 2018-01-25  Contract: IH1802_20180125
Processing 2018-01-26  Contract: IH1802_20180126
Processing 2018-01-29  Contract: IH1802_20180129
Processing 2018-01-30  Contract: IH1802_20180130
Processing 2018-01-31  Contract: IH1802_20180131
Processing 2018-02-0