# All Codes for Team 2C
### Lei Jin,Ruinan Lu, Yuanrui Li, Xianci Zhang

# Data Downloader

In [1]:
from datetime import date, timedelta
import datetime
import pandas as pd


class DataStore:
    def __init__(self, stdt, endt, freq = '300T', tickers=["XBTUSD"]):
        self.freq        = freq
        self.startdate        = pd.to_datetime(stdt)
        self.enddate        = pd.to_datetime(endt)
        self.ticker_list= tickers
    
    def loadData(self, tag):
        datelist=[x.strftime('%Y%m%d') for x in pd.date_range(start=self.startdate,end=self.enddate)]
        li = []
        for date in datelist:
            path=path='https://s3-eu-west-1.amazonaws.com/public.bitmex.com/data/'+tag+'/'+date+'.csv.gz'
            print ("loading data", tag, date)
            df = pd.read_csv(path,compression='gzip',error_bad_lines=False)
            df=df[df.symbol.isin(self.ticker_list)]
            li.append(df)
        frame = pd.concat(li, axis=0, ignore_index=True)
        frame["timestamp"]=pd.to_datetime(frame["timestamp"], errors='coerce',format='%Y-%m-%dD%H:%M:%S.%f')
        return frame
    
    def loadTradeData(self):
        if not hasattr(self, 'raw_tradeData'):
            #trade=pd.read_csv("20190731M/trade.csv")
            #trade=trade.drop(columns=['Unnamed: 0'])
            #trade["timestamp"]=pd.to_datetime(trade["timestamp"], 
                 #errors='coerce',format='%Y-%m-%d %H:%M:%S.%f')
            #self.raw_tradeData = trade
            self.raw_tradeData = self.loadData("trade")
        return self.raw_tradeData
    
    def loadQuoteData(self):
        if not hasattr(self, 'raw_quoteData'):
            #quote=pd.read_csv("20190731M/quote.csv")            
            #quote=quote.drop(columns=['Unnamed: 0'])
            #quote["timestamp"]=pd.to_datetime(quote["timestamp"], 
                 #errors='coerce',format='%Y-%m-%d %H:%M:%S.%f')
            #self.raw_quoteData = quote
            self.raw_quoteData = self.loadData("quote")
        return self.raw_quoteData
    
    
    def resampleQuote(self, df, frequency):
        print ("resampling quote data to ",frequency)
        df=df.rename(columns={'bidSize':'bidSizeBalance','askSize':'askSizeBalance'})
        df=df.sort_values('timestamp').groupby(['symbol']).resample(frequency,on="timestamp").agg({'bidSizeBalance' : ["min","max","last"],
                      'askSizeBalance' : ["min","max","last"],'bidPrice' : ["min","max","last"],'askPrice' : ["min","max","last"], 'timestamp':"last"}).reset_index()
        df.columns = df.columns.map('_'.join)
        df = df.reset_index().rename(columns={'symbol_':'symbol',"timestamp_":"timestamp","timestamp_last":"timestamp_lastQuote"}).drop(columns=['index'])
        df=df.dropna()
        return df
    
    def resampleTrade(self, df, frequency):
        print ("resampling trade data to ",frequency)
        df=df[["timestamp","symbol","side","total_size","price"]].rename(columns={'total_size':'tradeSize','price':'tradePrice'})
#        df=df.sort_values('timestamp').groupby(['symbol',"side"]).resample(frequency,on="timestamp").agg({'tradeSize' : ["min","max","last","sum"],
#                      'tradePrice' : ["min","max","last","mean"], 'timestamp':"last"}).reset_index()
#        df.columns = df.columns.map('_'.join)
#        df = df.reset_index().rename(columns={'symbol_':'symbol',"timestamp_":"timestamp","timestamp_last":"timestamp_lastTrade"}).drop(columns=['index'])
        
        price_df=df.sort_values('timestamp').groupby(['symbol']).resample("15T",on="timestamp").agg({
                      'tradePrice' : ["min","max","last","mean"], 'timestamp':"last"}).reset_index()
        price_df.columns = price_df.columns.map('_'.join)
        price_df = price_df.reset_index().rename(columns={'symbol_':'symbol',"timestamp_":"timestamp","timestamp_last":"timestamp_lastTrade"}).drop(columns=['index'])
        
        side_df=df.sort_values('timestamp').groupby(['symbol',"side"]).resample("15T",on="timestamp").agg({'tradeSize' : ["min","max","last","sum"],
                      'tradePrice' : ["min","max","last","mean"], 'timestamp':["last","count"]}).reset_index()
        side_df.columns = side_df.columns.map('_'.join)
        side_df = side_df.reset_index().rename(columns={'symbol_':'symbol',"timestamp_":"timestamp","side_":"side","timestamp_last":"timestamp_lastTrade"}).drop(columns=['index'])
        total_size = pd.pivot_table(side_df, values='tradeSize_sum', index=["symbol","timestamp"], columns=['side'])
        total_size=total_size.reset_index().rename(columns={'Buy':'Buy_totalSize',"Sell":"Sell_totalSize"})
        df=price_df.merge(total_size, on=['symbol','timestamp'], how='left')
        df=df.dropna()
        return df
    
    
    def aggregateTrade(self, df):
        df=df.groupby(['timestamp', 'symbol', 'side', 'price']).agg({'size':{'total_size': 'sum'}})
        df.columns = df.columns.droplevel(0)
        df=df.reset_index()
        return df
 
    def resampleData(self):
        if not hasattr(self, 'resampledData'):
            quote=self.loadQuoteData()
            trade=self.loadTradeData()
            
            
            trade=self.aggregateTrade(trade)
            quote=self.resampleQuote(quote,self.freq)
            trade=self.resampleTrade(trade,self.freq)
            self.resampledData = pd.merge(quote,trade, on=["symbol", "timestamp"], how="left")
#            self.resampledData = self.resampledData.dropna()
        return self.resampledData
            
#        for frequency in self.freq:
#            if frequency not in (self.resampledData.keys()):
#                quote.to_csv("quote.csv")
#                trade.to_csv("trade.csv")
#                quote=pd.read_csv("quote.csv")
#                trade=pd.read_csv("trade.csv")


    

# EDA

In [2]:
import matplotlib.pyplot as plt

def plot_interval_distribution(datastore):
    raw_tradeData=datastore.raw_tradeData
    trade_intervals=raw_tradeData.groupby(['symbol','side'])["timestamp"].diff().dt.seconds
    trade_intervals=trade_intervals[(trade_intervals<100)&(trade_intervals>1)]
    trade_intervals.hist(bins=100)
    plt.title("distribution of interval between orders")
    plt.xlabel("seconds")
    plt.ylabel("counts")
    plt.show()
    raw_quoteData=datastore.raw_quoteData
    trade_intervals=raw_quoteData.groupby(['symbol'])["timestamp"].diff().dt.seconds
    trade_intervals=trade_intervals[(trade_intervals<60)&(trade_intervals>1)]
    trade_intervals.hist(bins=100)
    plt.title("distribution of interval between trades")
    plt.xlabel("seconds")
    plt.ylabel("counts")
    plt.show()


# Factor Evaluate

In [3]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
import statsmodels.api as sm

def test_factor(df, x_name,y_name):
    df=df.dropna()
    model=sm.OLS(df[y_name],sm.add_constant(df[x_name])).fit()
    print (model.summary())
#    df.plot.scatter(x=x_name,y=y_name)
#    plt.show()
    for x in x_name:
        sub_model=sm.OLS(df[y_name],sm.add_constant(df[x])).fit()
        print (sub_model.summary())


def plot_corr_heatmap(X, title="Corr Heatmap"):
    X_corr = X.corr()
    plt.figure(figsize=(16,10))
    plt.title(title)
    mask = np.zeros_like(X_corr, dtype=np.bool)
    mask[np.triu_indices_from(mask)] = True
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    sns.heatmap(mask=mask, data=X_corr, annot=True, cmap=cmap)

def pairplot_with_target(X,frequency ,features, target):
    def pairplot(x, y, **kwargs):
        ax = plt.gca()
        ts = pd.DataFrame({'x': x, 'y': y})
        ts=ts.dropna()
        ts.plot.scatter(x="x",y="y",ax=ax)
        plt.xticks(rotation=90)
    f = pd.melt(X, id_vars=[target], value_vars=features)
    g = sns.FacetGrid(f, col="variable",  col_wrap=3, sharex=False, sharey=False, size=3)
    g = g.map(pairplot, "value", target)
    g.fig.subplots_adjust(top=0.9)
    g.fig.suptitle("scatter plot feature vs. return, interval="+frequency+" minutes")


# Factor Generate

In [4]:
import numpy as np
#make features
def feature_engineer(df, features=['lob_sizeImbalance','midPrice','midPrice_return','bidAskSpread','normalized_bidAskSpread','tradePriceMomentum','tradePriceVolatility','trade_sizeImbalance']):
    print (df.columns)
    df["lob_sizeImbalance"]=(df["bidSizeBalance_last"]-df["askSizeBalance_last"])/(df["bidSizeBalance_last"]+df["askSizeBalance_last"])
    df["midPrice"]=(df["bidPrice_last"]+df["askPrice_last"])/2
    df['midPrice_return'] = df.sort_values('timestamp').groupby(['symbol'])["midPrice"].pct_change(1)
    df["bidAskSpread"]=(df["bidPrice_last"]-df["askPrice_last"])
    df["normalized_bidAskSpread"]=(df["bidPrice_last"]-df["askPrice_last"])/(df["tradePrice_last"])
    df['return'] = df.sort_values('timestamp').groupby(['symbol'])["tradePrice_last"].pct_change(1)
    df['tradePriceMomentum'] = (df["tradePrice_last"]-df["tradePrice_min"])/(df["tradePrice_max"]-df["tradePrice_min"])
    df['tradePriceVolatility'] = np.log(df["tradePrice_max"])-np.log(df["tradePrice_min"])
    df["trade_sizeImbalance"]=(df["Buy_totalSize"]-df["Sell_totalSize"])/(df["Buy_totalSize"]+df["Sell_totalSize"]) 
    df=df.set_index('timestamp')
    df['next_return']=df['return'].shift(-1)
    df=df.iloc[:-1,:]
    df = df.dropna()
    return df[features], df['tradePrice_last'], df['next_return']


# Prediction Model

In [5]:
import statsmodels.api as sm
from sklearn import model_selection
#from sklearn.impute import SimpleImputer
from sklearn.preprocessing import Imputer
from scipy.stats import boxcox
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from matplotlib import pyplot
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
     
class Engine:
    
    def __init__(self, data, price, next_return):
        self.raw_data = data
        self.price = price
        self.next_return = next_return
        self.X_train = None
        self.y_train = None
        self.X_valid = None
        self.y_valid = None
        self.X_test = None
        self.y_test = None
        self.performance_history=None
        
        self.preprocess_params = {"imputer": None,
                                  "normalizer": None,
                                  "boxcox_X_lam": {},
                                  "boxcox_y_lam": None}
        self.model = {"OLS": None,
                     "logisticR": None}
        
    def train_test_split(self, base_features, target='next_return'):
        self.X_train, self.X_test, self.y_train, self.y_test = model_selection.train_test_split(self.raw_data[base_features], self.next_return, random_state = 0)
        self.X_train=self.X_train.sort_index()
        self.X_test=self.X_test.sort_index()
        self.y_train=self.y_train.sort_index()
        self.y_test=self.y_test.sort_index()
        
    # preprocessing:
    def __impute(self, X, train=False, strategy='constant'):
        ind = X.index
        col = X.columns
        if train:
            #using constant imputer for price volatility and standard scaler for normalization
            self.preprocess_params["imputer"] = Imputer(strategy=strategy)
            X = pd.DataFrame(self.preprocess_params["imputer"].fit_transform(X), columns=col, index=ind)
        else:
            X = pd.DataFrame(self.preprocess_params["imputer"].transform(X), columns=col, index=ind)
        return X
    
    def __boxcox(self, X, is_X=True, train=False):
        '''is_X: True, boxcox for X; False, boxcox for y
        '''
        ind = X.index
        res = pd.DataFrame(index=ind)
        for col_name, series in X.iteritems():
            if train:
                bc = boxcox(series)
                res[col_name] = bc[0]
                if is_X:
                    self.preprocess_params["boxcox_X_lam"][col_name] = bc[1]
                else:
                    self.preprocess_params["boxcox_y_lam"] = bc[1]
            else:
                if is_X:
                    lam = self.preprocess_params["boxcox_X_lam"][col_name]
                else:
                    lam = self.preprocess_params["boxcox_y_lam"]
                res[col_name] = boxcox(series, lmbda=lam)
        return res
    
    def __boxcox_inv(self, y, lmbda):
        '''inverse boxcox transformation
        '''
        if lmbda == 0:
            return np.exp(y)
        else:
            return np.power(lmbda * y + 1, 1 / lmbda)
    
    def boxcox_inv(self, y, lmbda):
        return self.__boxcox_inv(y, lmbda)
    
    def __normalize(self, X, train=False):
        ind = X.index
        col = X.columns
        if train:
            #normalizer
            self.preprocess_params["normalizer"] = StandardScaler()
            X = pd.DataFrame(self.preprocess_params["normalizer"].fit_transform(X), columns=col, index=ind)
        else:
            X = pd.DataFrame(self.preprocess_params["normalizer"].transform(X), columns=col, index=ind)
        return X
    
    def preprocessing(self, impute=True, boxcox=True, normalize=True, **kwargs):
        '''other parameters: strategy
        '''
        if impute:
            self.X_train = self.__impute(self.X_train, train=True, strategy=kwargs['strategy'])
            self.X_test = self.__impute(self.X_test, train=False, strategy=kwargs['strategy'])
        
        if boxcox:
            self.X_train = self.__boxcox(1 + self.X_train, is_X=True, train=True)
            self.y_train = self.__boxcox(1 + self.y_train, is_X=False, train=True)
            self.X_test = self.__boxcox(1 + self.X_test, is_X=True, train=False)
            self.y_test = self.__boxcox(1 + self.y_test, is_X=False, train=False)
        
        if normalize:
            self.X_train = self.__normalize(self.X_train, train=True)
            self.X_test = self.__normalize(self.X_test, train=False)
    
    # model
    def build_ols(self,summary_tag=True):
        '''simple neural network
        '''
        self.model["OLS"] = sm.OLS(self.y_train,sm.add_constant(self.X_train), missing='drop').fit()
        if summary_tag:
            print (self.model["OLS"].summary())
          
    def build_lgr(self,summary_tag=True):
        '''simple neural network
        '''
        from sklearn.linear_model import LogisticRegression
        self.model["logisticR"] = LogisticRegression(random_state=0, solver='lbfgs',multi_class='multinomial').fit(sm.add_constant(self.X_train), (self.y_train > 0.0).astype(int))
#        if summary_tag:
#            print (self.model["logisticR"].summary())

    # train:
    def train(self, model):
        """model: OLS, LGR
        """
        if model=='OLS': self.build_ols(summary_tag=True)
        if model=='logisticR': self.build_lgr(summary_tag=True)

    # predict
    def predict(self, model, X):
        y_pred = self.model[model].predict(sm.add_constant(X))
        if self.preprocess_params["boxcox_y_lam"]:
            y_pred = self.__boxcox_inv(y_pred, self.preprocess_params["boxcox_y_lam"]) - 1
        
        return y_pred
    
    def _plot_prediction(prediction, actual, index):
        pyplot.plot(actual, label='actual', color='red')
        pyplot.plot(prediction, label='prediction', color='blue')
        pyplot.title("actual vs. prediction")
        pyplot.legend()
        pyplot.show()
        
    
    # evaluate:
    def score(self, predict, true):
        return mean_absolute_error(predict, true), np.sqrt(mean_squared_error(predict, true))
    
    def result_evaluation(self, predict, true):
        self.plot_predictions(predict, true)
        mae,mse=self.score(predict, true)
        print ("MAE:",mae,"MSE",mse)
        
    
    def plot_predictions(self, predict, true):
        plt.figure(figsize=(6,4))
        plt.title('testing set: actual vs. prediction')
        predict.plot(label='predicted price')
        true.plot(label='actual price')
        plt.xlabel('date')    

# Strategy

In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

class Backtest:
    
    def __init__(self, engine):  # trained engine
        self.price = engine.price
        self.next_return = engine.next_return
        self.train_period = engine.y_train.index
        self.test_period = engine.y_test.index
        
        self.y_train_pred = np.mean([engine.predict("OLS", engine.X_train)], axis=0)
        self.y_test_pred = np.mean([engine.predict("OLS", engine.X_test)], axis=0)

    
    def init_trade(self, which):
        '''initialize trade
        '''
        assert which in ['train', 'valid', 'test']
        
        # Account
        self.cash = self.init_cap = 1.0e8
        self.position = {}
        
        # trade date, open/close signal
        if which == 'train':
            self._trade_date = self.train_period
            self._y_pred = self.y_train_pred
        elif which == 'valid':
            self._trade_date = self.valid_period
            self._y_pred = self.y_valid_pred
        else:
            self._trade_date = self.test_period
            self._y_pred = self.y_test_pred
        
        self._price_list = self.price.loc[self._trade_date].values
        self._return_list = self.next_return.loc[self._trade_date].values
        
        self._get_signal()
        
        self.__trade_date_iter = iter(self._trade_date)
        self.__price_list_iter = iter(self._price_list)
        self.__return_list_iter = iter(self._return_list)
        
        self.__open_date = next(self._open_signal_iter)
        self.__close_date = next(self._close_signal_iter)
        
        # risk
        self.tdays = len(self._trade_date)
        self.total_returns = []
        self.risk_indicators_dict = {}
    
    def _get_signal(self):
        '''open and close signal
        '''
        raise NotImplementedError
        
    def __update_position_info(self, price):
        '''update position infomation daily
        '''
        if self.position:
            self.position['Bitcoin']['price'] = price
            self.position['Bitcoin']['cap'] = self.position['Bitcoin']['price'] * self.position['Bitcoin']['number']
            self.position['Bitcoin']['holding days'] += 1
    
    def __open(self, price):
        num = self.cash / price
        self.position['Bitcoin'] = {'price': price, 'number': num, 'cap': self.cash, 'holding days': 0}
        self.cash = 0
        
    def __close(self):
        self.cash = self.position['Bitcoin']['cap']
        self.position.pop('Bitcoin')
        
        
    # Risks
    def __cal_total_returns(self):
        self.total_cap = self.position['Bitcoin']['cap'] if self.position else self.cash
        self.total_returns.append(np.log(self.total_cap) - np.log(self.init_cap))
        
    def __cal_total_annualized_returns(self):
        self.total_annualized_returns = self.total_returns[-1] * 250.0 / self.tdays
        self.risk_indicators_dict['Annualized Return'] = self.total_annualized_returns
    
    def __cal_daily_returns(self):
        self.daily_returns = np.array(self.total_returns)[1:] - np.array(self.total_returns)[:-1]
    
    def __cal_annualized_volatility(self):
        self.annualized_volatility = self.daily_returns.std() * np.sqrt(250)
        self.risk_indicators_dict['Annualized Volatility'] = self.annualized_volatility

    def __cal_sharpe(self):
        self.sharpe = self.total_annualized_returns / self.annualized_volatility
        self.risk_indicators_dict['Sharpe Ratio'] = self.sharpe
        
    def __cal_max_drawdown(self):
        caps = self.init_cap * np.exp(self.total_returns)
        start = end = 0
        i = 0
        mdd = 0
        cap_max = caps[0]
        for k, cap in enumerate(caps):
            if k > i and 1 - cap / cap_max > mdd:
                mdd = 1 - cap / cap_max
                end = k
                start = i
            if cap > cap_max:
                cap_max = cap
                i = k
        if mdd == 0:
            self.max_drawdown = 0
            self.mdd_start = None
            self.mdd_end = None
        else:
            self.max_drawdown = mdd
            self.mdd_start = self._trade_date[start]
            self.mdd_end = self._trade_date[end]
        self.risk_indicators_dict['Max DrawDown'] = self.max_drawdown
    
    def __do_daily_calculation(self):
        self.__cal_total_returns()
        
    def get_risk_indicators(self):
        self.__cal_daily_returns()
        self.__cal_total_annualized_returns()
        self.__cal_annualized_volatility()
        self.__cal_sharpe()
        self.__cal_max_drawdown()

    def visualize_PNL(self, size=(6, 4), linewidth=('2', '3'), color=('r', 'black')):
        total_returns = pd.Series(self.total_returns, index=self._trade_date)
        fig = plt.figure(figsize=size)
        ax1 = fig.add_subplot(111)
        ax1.plot(total_returns, linewidth=linewidth[0], color=color[0])
        plt.axhline(0, linewidth=linewidth[1], color=color[1])
        plt.title("cumulative return")
        ax1.set_xlim(left=self._trade_date[0])
        ax1.plot()
        ax1.yaxis.grid(True)

    
    def __iter__(self):
        return self
    
    def __next__(self):
        date = next(self.__trade_date_iter)
        price = next(self.__price_list_iter)

        self.__update_position_info(price)
        
        self.__do_daily_calculation()
        
        if date == self.__close_date:
            # close position
            self.__close()
            self.__close_date = next(self._close_signal_iter)
        
        if date == self.__open_date:
            # open position
            self.__open(price)
            self.__open_date = next(self._open_signal_iter)
        return
    
    def Run(self):
        for _ in self:
            pass
        self.get_risk_indicators()

class Strategy_A(Backtest):
    
    def __init__(self, engine):
        Backtest.__init__(self, engine)
    
    def _get_signal(self):
        y_pred = self._y_pred
        
        holding_days = self._y_pred.shape[0]
        true_return = self._return_list
        print ("holding_days: ", holding_days)
        open_signal = pd.Series(y_pred > true_return)
        is_holding = open_signal.rolling(window=holding_days-1, min_periods=1).apply(lambda x: x.any()).shift(1).fillna(0.0).astype("bool")
        open_signal &= ~is_holding
        close_signal = open_signal.shift(holding_days).fillna(False)
        open_signal = self._trade_date[open_signal.values]
        close_signal = self._trade_date[close_signal.values]
        
        # append a date so that open_signal and close_signal iterators will not end before trade_date iterator
        self._open_signal_iter = iter(np.append(open_signal, np.datetime64('2020-01-01')))
        self._close_signal_iter = iter(np.append(close_signal, np.datetime64('2020-01-01')))
        
class Strategy_B(Backtest):
    
    def __init__(self, engine):
        Backtest.__init__(self, engine)
    
    def _get_signal(self):
        y_pred = self._y_pred
        
        holding_days = self._y_pred.shape[0]
#        true_return = self._return_list
        print ("holding_days: ", holding_days)
        open_signal = pd.Series(y_pred > 0)
        is_holding = open_signal.rolling(window=holding_days-1, min_periods=1).apply(lambda x: x.any()).shift(1).fillna(0.0).astype("bool")
        open_signal &= ~is_holding
        close_signal = open_signal.shift(holding_days).fillna(False)
        open_signal = self._trade_date[open_signal.values]
        close_signal = self._trade_date[close_signal.values]
        
        # append a date so that open_signal and close_signal iterators will not end before trade_date iterator
        self._open_signal_iter = iter(np.append(open_signal, np.datetime64('2020-01-01')))
        self._close_signal_iter = iter(np.append(close_signal, np.datetime64('2020-01-01')))
        


# Main

In [None]:
import dataDownloader as db
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
pd.set_option('display.expand_frame_repr', False)    
import numpy as np
import seaborn as sns
import EDA as eda
import factorEvaluate as factorEvaluator
import factorGenerate as factorGenerator
import predictionModel as model
import strategy as backtest

#%%

#config
start_date='20160801'
end_date='20160801'
frequency="15T"#how many minutes:300T=5h
#load data
datastore = db.DataStore(start_date,end_date,frequency)
data=datastore.resampleData()
eda.plot_interval_distribution(datastore)
#%%
#factor engineer
data, price, target_return = factorGenerator.feature_engineer(data)
base_features = list(set(data.columns))
data_df=pd.concat([data, target_return], axis=1)
#%%
factorEvaluator.test_factor(data_df, base_features,'next_return')
factorEvaluator.plot_corr_heatmap(data_df.corr(),title="Corr Heatmap for interval="+frequency+" minutes")
factorEvaluator.pairplot_with_target(data_df,frequency, list(set(data.columns) - set(['symbol'])), 'next_return')

#%%
##################### 
#set up model
engine = model.Engine(data, price, target_return)
engine.train_test_split(base_features)
engine.preprocessing(boxcox=False, strategy='constant')
#prediction
engine.train('OLS')
test_predict=engine.predict('OLS', engine.X_test)
engine.result_evaluation(test_predict,engine.y_test)
#%%
########################
#back testing
strategy_A = backtest.Strategy_A(engine)
strategy_A.init_trade('test')
strategy_A.Run()
strategy_A.visualize_PNL()
print (strategy_A.risk_indicators_dict)

#%%
########################
#back testing
strategy_B = backtest.Strategy_B(engine)
strategy_B.init_trade('test')
strategy_B.Run()
strategy_B.visualize_PNL()
print (strategy_A.risk_indicators_dict)


#%%
##################### 
#direction model
engine.train('logisticR')
test_predict=pd.DataFrame(engine.predict('logisticR', engine.X_test), index=engine.X_test.index)
engine.result_evaluation(test_predict,(engine.y_test > 0.0).astype(int))
########################
#back testing
strategy_B = backtest.Strategy_B(engine)
strategy_B.init_trade('test')
strategy_B.Run()
strategy_B.visualize_PNL()
print (strategy_A.risk_indicators_dict)


