In [24]:
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime as dt
from sklearn.neighbors import KNeighborsClassifier
import pickle

from ta.trend import MACD
from ta.momentum import RSIIndicator
from ta.momentum import StochasticOscillator
from ta.volatility import BollingerBands
from ta.volume import AccDistIndexIndicator
from ta.volatility import AverageTrueRange

In [25]:
symbols = ['EURUSD=X','USDJPY=X','GBPUSD=X','USDCHF=X','AUDUSD=X','USDCAD=X','NZDUSD=X']

data = dict()
for symbol in symbols:
    data[symbol] = yf.download(symbol,interval='1d')

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [26]:
def add_ta_signals(df):
    df = df.copy()

    o,h,l,c,v = df['Open'],df['High'],df['Low'],df['Close'],df['Volume']
    
    df['Ret'] = np.log(c).diff().shift(-1)

    df['MACDD'] = MACD(c).macd_diff()
    df['MACDS'] = MACD(c).macd_signal()

    df['RSI'] = RSIIndicator(c).rsi()

    df['SO'] = StochasticOscillator(h,l,c).stoch()
    df['SOS'] = StochasticOscillator(h,l,c).stoch_signal()

    df['BBH'] = BollingerBands(c).bollinger_hband_indicator() - c
    df['BBL'] = BollingerBands(c).bollinger_lband_indicator() - c

    df['ADI'] = AccDistIndexIndicator(h,l,c,v).acc_dist_index()

    df['HLR'] = c / (h.rolling(252).max() - l.rolling(252).min()) - 1 / 2

    df['ATR'] = AverageTrueRange(h,l,c).average_true_range()

    df = df.drop(columns=['Open','High','Low','Close','Adj Close','Volume'])
    #df.iloc[:,1:-1] = df.iloc[:,1:-1].div(df.loc[:,'ATR'],axis=0)
    return df

In [63]:
train_data = dict()
ta_features = dict()
for symbol in symbols:
    ta_data = add_ta_signals(data[symbol])
    train_data[symbol] = ta_data.dropna()
    ta_features[symbol] = ta_data.iloc[-252:]

In [56]:
concat_train_data = pd.concat(train_data.values())

In [166]:
from sklearn.neighbors import KNeighborsClassifier

knns = [KNeighborsClassifier(n_neighbors=10) for _ in range(100)]
for i,knn in enumerate(knns):
        bootstrap_train_data = concat_train_data.sample(frac=0.3,replace=True)
        knn.fit(X=bootstrap_train_data.iloc[:,1:],
                y=np.sign(bootstrap_train_data.iloc[:,0]))

In [36]:
# save
for i in range(100):
    with open(f'models/knn_{i}.pkl','wb') as f:
        pickle.dump(knns[i],f)

In [30]:
# load
knns = list()
for i in range(100):
    with open(f'models/knn_{i}.pkl', 'rb') as f:
        knn = pickle.load(f)
        knns.append(knn)

In [64]:
for symbol in symbols:
    features = ta_features[symbol]
    pos = pd.DataFrame()
    for i,knn in enumerate(knns):
        pos[f'{i}'] = knn.predict(features.iloc[:,1:])
    features['Pred'] = pos.mean(axis=1).to_numpy()
    features['Pos'] = (np.around((features['Pred'].expanding().rank(pct=True)*2-1)*5))/5

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  features['Pred'] = pos.mean(axis=1).to_numpy()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  features['Pos'] = (np.around((features['Pred'].expanding().rank(pct=True)*2-1)*5))/5


In [58]:
base_to_usd = { 'EURUSD=X': 'USDEUR=X',
                'GBPUSD=X': 'USDGBP=X',
                'AUDUSD=X': 'USDAUD=X',
                'NZDUSD=X': 'USDNZD=X'}
prc_to_usd = yf.download(list(base_to_usd.values()))['Close']

[*********************100%***********************]  4 of 4 completed


In [76]:
import datetime, pytz
nyc_datetime = datetime.datetime.now(pytz.timezone('US/Eastern'))
print(nyc_datetime)
print('Using the nearest-day position is correct!!!')
print('Backtest is buying at today\'s close based on data generated at today\'s close, and predicting the return from today to tomorrow')
print('Trading 18x is optimal but 5x is conservative')

2024-01-24 11:22:08.789902-05:00
Using the nearest-day position is correct!!!
Backtest is buying at today's close based on data generated at today's close, and predicting the return from today to tomorrow
Trading 18x is optimal but 5x is conservative


In [65]:
leverage = 6
acc_size = 50000
lot_size = 100000

position = pd.DataFrame()
for symbol in symbols:
    capital = leverage * acc_size / len(symbols)
    
    if symbol in base_to_usd.keys():
        lot_usd = lot_size / prc_to_usd[base_to_usd[symbol]].iloc[-1]
    else:
        lot_usd = lot_size * 1
    
    pos_value = ta_features[symbol]['Pos'] * capital

    position[symbol] = pos_value / lot_usd
np.around(position,2)[::-1]

Unnamed: 0_level_0,EURUSD=X,USDJPY=X,GBPUSD=X,USDCHF=X,AUDUSD=X,USDCAD=X,NZDUSD=X
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-01-24,-0.13,0.57,0.45,0.29,0.00,0.43,-0.46
2024-01-23,-0.00,0.14,-0.45,0.14,-0.00,-0.57,1.16
2024-01-22,-0.65,0.14,0.34,-0.43,-0.43,0.14,-0.46
2024-01-19,0.39,0.29,0.11,-0.57,-0.65,0.00,-0.23
2024-01-18,-0.26,-0.14,0.34,0.14,1.08,0.00,1.16
...,...,...,...,...,...,...,...
2023-02-13,0.65,-0.14,0.11,0.14,0.65,0.14,0.70
2023-02-10,0.26,0.00,0.56,0.00,0.43,-0.29,0.46
2023-02-09,-0.26,0.71,0.56,0.71,0.43,-0.29,0.46
2023-02-08,0.00,0.71,0.56,0.71,0.00,0.71,0.00


In [75]:
position_change = np.around(position,2).diff()
position_change[::-1]

Unnamed: 0_level_0,EURUSD=X,USDJPY=X,GBPUSD=X,USDCHF=X,AUDUSD=X,USDCAD=X,NZDUSD=X
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-01-24,-0.13,0.43,0.90,0.15,0.00,1.00,-1.62
2024-01-23,0.65,0.00,-0.79,0.57,0.43,-0.71,1.62
2024-01-22,-1.04,-0.15,0.23,0.14,0.22,0.14,-0.23
2024-01-19,0.65,0.43,-0.23,-0.71,-1.73,0.00,-1.39
2024-01-18,-0.52,-0.57,0.12,-0.43,1.94,-0.14,1.86
...,...,...,...,...,...,...,...
2023-02-13,0.39,-0.14,-0.45,0.14,0.22,0.43,0.24
2023-02-10,0.52,-0.71,0.00,-0.71,0.00,0.00,0.00
2023-02-09,-0.26,0.00,0.00,0.00,0.43,-1.00,0.46
2023-02-08,-0.65,0.00,0.00,0.00,-1.08,0.00,-1.16
