In [24]:
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime as dt
from sklearn.neighbors import KNeighborsClassifier
import pickle

from ta.trend import MACD
from ta.momentum import RSIIndicator
from ta.momentum import StochasticOscillator
from ta.volatility import BollingerBands
from ta.volume import AccDistIndexIndicator
from ta.volatility import AverageTrueRange

In [25]:
symbols = ['EURUSD=X','USDJPY=X','GBPUSD=X','USDCHF=X','AUDUSD=X','USDCAD=X','NZDUSD=X']

data = dict()
for symbol in symbols:
    data[symbol] = yf.download(symbol,interval='1d')

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [26]:
def add_ta_signals(df):
    df = df.copy()

    o,h,l,c,v = df['Open'],df['High'],df['Low'],df['Close'],df['Volume']
    
    df['Ret'] = np.log(c).diff().shift(-1)

    df['MACDD'] = MACD(c).macd_diff()
    df['MACDS'] = MACD(c).macd_signal()

    df['RSI'] = RSIIndicator(c).rsi()

    df['SO'] = StochasticOscillator(h,l,c).stoch()
    df['SOS'] = StochasticOscillator(h,l,c).stoch_signal()

    df['BBH'] = BollingerBands(c).bollinger_hband_indicator() - c
    df['BBL'] = BollingerBands(c).bollinger_lband_indicator() - c

    df['ADI'] = AccDistIndexIndicator(h,l,c,v).acc_dist_index()

    df['HLR'] = c / (h.rolling(252).max() - l.rolling(252).min()) - 1 / 2

    df['ATR'] = AverageTrueRange(h,l,c).average_true_range()

    df = df.drop(columns=['Open','High','Low','Close','Adj Close','Volume'])
    #df.iloc[:,1:-1] = df.iloc[:,1:-1].div(df.loc[:,'ATR'],axis=0)
    return df

In [27]:
train_data = dict()
ta_features = dict()
for symbol in symbols:
    ta_data = add_ta_signals(data[symbol])
    train_data[symbol] = ta_data.dropna()
    ta_features[symbol] = ta_data.iloc[-252:]

In [28]:
concat_train_data = pd.concat(train_data.values())

In [166]:
from sklearn.neighbors import KNeighborsClassifier

knns = [KNeighborsClassifier(n_neighbors=10) for _ in range(100)]
for i,knn in enumerate(knns):
        bootstrap_train_data = concat_train_data.sample(frac=0.3,replace=True)
        knn.fit(X=bootstrap_train_data.iloc[:,1:],
                y=np.sign(bootstrap_train_data.iloc[:,0]))

In [36]:
# save
for i in range(100):
    with open(f'models/knn_{i}.pkl','wb') as f:
        pickle.dump(knns[i],f)

In [30]:
# load
knns = list()
for i in range(100):
    with open(f'models/knn_{i}.pkl', 'rb') as f:
        knn = pickle.load(f)
        knns.append(knn)

In [31]:
for symbol in symbols:
    features = ta_features[symbol]
    pos = pd.DataFrame()
    for i,knn in enumerate(knns):
        pos[f'{i}'] = knn.predict(features.iloc[:,1:])
    features['Pred'] = pos.mean(axis=1).to_numpy()
    features['Pos'] = (np.around((features['Pred'].expanding().rank(pct=True)*2-1)*5))/5

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  features['Pred'] = pos.mean(axis=1).to_numpy()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  features['Pos'] = (np.around((features['Pred'].expanding().rank(pct=True)*2-1)*5))/5


In [32]:
to_usd = {  'USDJPY=X': 'USDJPY=X',
            'USDCHF=X': 'USDCHF=X',
            'USDCAD=X': 'USDCAD=X'}
prc_to_usd = yf.download(list(to_usd.values()))['Close']

[*********************100%***********************]  3 of 3 completed


In [35]:
leverage = 10
acc_size = 50000
lot_size = 100000

position = pd.DataFrame()
for symbol in symbols:
    capital_usd = leverage * acc_size / len(symbols)
    
    if symbol in to_usd.keys():
        capital = capital_usd #/ prc_to_usd[to_usd[symbol]].iloc[-1]
    else:
        capital = capital_usd
    
    pos_value = ta_features[symbol]['Pos'] * capital

    position[symbol] = pos_value / lot_size
position[::-1]

Unnamed: 0_level_0,EURUSD=X,USDJPY=X,GBPUSD=X,USDCHF=X,AUDUSD=X,USDCAD=X,NZDUSD=X
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-01-24,-0.142857,0.571429,0.571429,0.285714,0.000000,0.428571,-0.285714
2024-01-23,-0.000000,0.142857,-0.571429,0.142857,-0.000000,-0.571429,0.714286
2024-01-22,-0.714286,0.142857,0.428571,-0.428571,-0.285714,0.142857,-0.285714
2024-01-19,0.428571,0.285714,0.142857,-0.571429,-0.428571,0.000000,-0.142857
2024-01-18,-0.285714,-0.142857,0.428571,0.142857,0.714286,0.000000,0.714286
...,...,...,...,...,...,...,...
2023-02-13,0.714286,-0.142857,0.142857,0.142857,0.428571,0.142857,0.428571
2023-02-10,0.285714,0.000000,0.714286,0.000000,0.285714,-0.285714,0.285714
2023-02-09,-0.285714,0.714286,0.714286,0.714286,0.285714,-0.285714,0.285714
2023-02-08,0.000000,0.714286,0.714286,0.714286,0.000000,0.714286,0.000000
