In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf


plt.style.use("seaborn-v0_8")

class IterativeBase():
    def __init__(self,symbol,start,end,amount,use_spread = True):
        self.symbol = symbol
        self.start = start
        self.end = end
        self.initial_balance = amount
        self.current_balance = amount
        self.units = 0
        self.trades = 0
        self.position = 0
        self.use_spread = use_spread
        self.get_data()

    def get_data(self):
        raw = pd.read_csv("detailed.csv",parse_dates = ["time"],index_col = "time").dropna()
        raw = raw.loc[self.start:self.end].copy()
        raw["returns"] = np.log(raw["price"]/raw["price"].shift(1))
        self.data = raw

    def plot_data(self,cols=None):
        if cols is None:
            cols = "price"
        self.data[cols].plot(figsize = (12,8),title = self.symbol)

    def show_data(self):
        return self.data
        

    def get_values(self,bar):
        date = str(self.data.index[bar].date())
        price = round(self.data.price.iloc[bar],5)
        spread = round(self.data.spread.iloc[bar],5)
        return date,price,spread

    def print_current_balance(self,bar):
        date,price,spread = self.get_values(bar)
        print(f"{date} | Current Balance : {round(self.current_balance,2)}")

    def buy_instrument(self,bar,units=None,amount=None):
        date,price,spread = self.get_values(bar)
        if self.use_spread is True:
            price += spread/2
        if amount is not None:
            units = int(amount/price)
        self.current_balance -= units*price
        self.units += units
        self.trades += 1
        print(f"{date} | Buying {units} at {price}")

    def sell_instrument(self,bar,units=None,amount=None):
        date,price,spread = self.get_values(bar)
        if self.use_spread is True:
            price -= spread/2
        if amount is not None:
            units = int(amount/price)
        self.current_balance += units*price
        self.units -= units
        self.trades += 1
        print(f"{date} | Selling {units} at {price}")

    def print_current_position_value(self,bar):
        date,price,spread = self.get_values(bar)
        cpv = self.units*price
        print(f"{date} | Current Position Value : {round(cpv,2)}")
            
    def print_current_nav(self,bar):
        date,price,spread = self.get_values(bar)
        nav = self.units*price + self.current_balance
        print(f"{date} | Net Asset Value : {round(nav,2)}")

    def close_position(self,bar):
        date,price,spread = self.get_values(bar)
        self.current_balance += self.units*price
        self.current_balance -= (abs(self.units)*(spread/2)*self.use_spread)
        print(75*"-")
        print(f"{date} | +++ Closing Final Position +++")
        print(f"{date} | Closing position for {self.units} at {price}")
        self.units = 0
        self.trades += 1
        perf = ((self.current_balance-self.initial_balance)/self.initial_balance) * 100
        self.print_current_balance(bar)
        print(f"{date} | Net Performance = {round(perf,2)}%")
        print(f"{date} | Number of trades executed = {self.trades}")        
        print(75*"-")
        

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier


class IterativeML(IterativeBase):

    # repr
    def __repr__(self):
        return f"Iterative ML Backtester (symbol= {self.symbol} , start= {self.start} , end= {self.end})"

    # helper methods
    def go_long(self, bar, units = None, amount = None):
        if self.position == -1:
            self.buy_instrument(bar, units = -self.units)
        if units:
            self.buy_instrument(bar, units = units)
        elif amount:
            if amount == "all":
                amount = self.current_balance
            self.buy_instrument(bar, amount = amount)

    def go_short(self,bar,units = None, amount = None):
        if self.position == 1:
            self.sell_instrument(bar, units = self.units)
        if units:
            self.sell_instrument(bar, units = units)
        elif amount:
            if amount == "all":
                amount = self.current_balance
            self.sell_instrument(bar, amount = amount)

    def split_data(self, start, end):
        data = self.data.loc[start:end].copy()
        return data

    def prepare_features(self,start,end):
        self.data_subset = self.split_data(start,end)
        self.feature_columns = []
        for lag in range(1,self.lags+1):
            col = f"lag{lag}"
            self.data_subset[col] = self.data_subset["returns"].shift(lag)
            self.feature_columns.append(col)
        self.data_subset.dropna(inplace=True)

    def scale_features(self,recalc=True):
        if recalc == True:
            self.means = self.data_subset[self.feature_columns].mean()
            self.stand_devs = self.data_subset[self.feature_columns].std()

        self.data_subset[self.feature_columns] = (self.data_subset[self.feature_columns]-self.means)/self.stand_devs
        
    def fit_model(self,start,end):
        self.prepare_features(start,end)
        self.scale_features(recalc = True)
        self.model.fit(self.data_subset[self.feature_columns],np.sign(self.data_subset["returns"]))

    def test_ml_strategy(self,train_ratio = 0.7,lags = 5):
        
        # initialisation print out
        print(75*"-")
        print(f"Testing ML strategy | {self.symbol} | lags = 5")
        print(75*"-")

        # reset
        self.position = 0
        self.trades = 0
        self.current_balance = self.initial_balance
        self.n = 0
        self.results = None

        # assign variables
        self.lags = lags
        self.train_ratio = train_ratio

        # determine datetime for start, end and split (training and testing)
        full_data = self.data.copy()
        split_index = int(len(full_data)*self.train_ratio)
        split_date = full_data.index[split_index-1]
        train_start = full_data.index[0]
        test_end = full_data.index[-1]

        self.get_data()
        self.prepare_features()

        # prepare model
        self.model = OneVsRestClassifier(LogisticRegression(C = 1e6, max_iter = 100000))


        # prepare data
        for lag in range(lags):
            print(self.n)
            self.n += 1



        #self.data["SMA_S"] = self.data.rolling(SMA_S).mean()
        #self.data["SMA_L"] = self.data.rolling(SMA_L).mean()    
        #self.data.dropna(inplace = True)

        # sma crossover strategy
        #for bar in range(len(self.data-1)):
        #    if self.data["SMA_S"].iloc[bar] > self.data["SMA_L"].iloc[bar]:
        #        if self.position in [0,-1]:
        #            self.go_long(bar,amount = "all")
        #            self.position = 1
        #    elif self.data["SMA_S"].iloc[bar] < self.data["SMA_L"].iloc[bar]:
        #        if self.position in [0,1]:
        #            self.go_short(bar,amount = "all")
         #           self.position = -1
        #self.close_position(bar+1)




                









NameError: name 'IterativeBase' is not defined

In [34]:
ml = IterativeML(symbol="EURUSD",start="2006-12-31",end="2020-06-30",amount=10000,use_spread = True)

In [35]:
print(ml)

Iterative ML Backtester (symbol= EURUSD , start= 2006-12-31 , end= 2020-06-30)


In [36]:
ml.show_data()

Unnamed: 0_level_0,price,spread,returns
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2006-12-31 22:00:00+00:00,1.31985,0.00100,
2007-01-01 22:00:00+00:00,1.32734,0.00015,0.005659
2007-01-02 22:00:00+00:00,1.31688,0.00015,-0.007912
2007-01-03 22:00:00+00:00,1.30845,0.00015,-0.006422
2007-01-04 22:00:00+00:00,1.30025,0.00100,-0.006287
...,...,...,...
2020-06-23 21:00:00+00:00,1.12507,0.00030,-0.005151
2020-06-24 21:00:00+00:00,1.12180,0.00023,-0.002911
2020-06-25 21:00:00+00:00,1.12184,0.00041,0.000036
2020-06-28 21:00:00+00:00,1.12424,0.00018,0.002137


In [37]:
ml.test_ml_strategy(5)

---------------------------------------------------------------------------
Testing ML strategy | EURUSD | lags = 5
---------------------------------------------------------------------------
0
1
2
3
4


lag1
lag2
lag3
lag4
lag5
lag6
lag7
lag8
lag9
lag10
lag11
lag12
lag13
lag14
lag15
lag16
lag17
lag18
lag19
lag20
