In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime as dt
from sklearn.neighbors import KNeighborsClassifier
import pickle

from ta.trend import MACD
from ta.momentum import RSIIndicator
from ta.momentum import StochasticOscillator
from ta.volatility import BollingerBands
from ta.volume import AccDistIndexIndicator
from ta.volatility import AverageTrueRange

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


# Data feed

In [2]:
symbols = ['EURUSD=X','USDJPY=X','GBPUSD=X','USDCHF=X','AUDUSD=X','USDCAD=X','NZDUSD=X']

data = dict()
for symbol in symbols:
    data[symbol] = yf.download(symbol,interval='1d')

  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
[*********************100%%**********************]  1 of 1 completed
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
[*********************100%%**********************]  1 of 1 completed
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
[*********************100%%**********************]  1 of 1 completed
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
[*********************100%%**********************]  1 of 1 completed
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
[*********************100%%**********************]  1 of 1 completed
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
[*********************100%%**********************]  1 of 1 completed
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
[*********************100%%**********************]  1 of 1 completed


# Add TA features

In [4]:
def add_ta_signals(df):
    df = df.copy()

    o,h,l,c,v = df['Open'],df['High'],df['Low'],df['Close'],df['Volume']
    
    df['Ret'] = np.log(c).diff().shift(-1)

    df['MACDD'] = MACD(c).macd_diff()
    df['MACDS'] = MACD(c).macd_signal()

    df['RSI'] = RSIIndicator(c).rsi()

    df['SO'] = StochasticOscillator(h,l,c).stoch()
    df['SOS'] = StochasticOscillator(h,l,c).stoch_signal()

    df['BBH'] = BollingerBands(c).bollinger_hband_indicator() - c
    df['BBL'] = BollingerBands(c).bollinger_lband_indicator() - c

    df['ADI'] = AccDistIndexIndicator(h,l,c,v).acc_dist_index()

    df['HLR'] = c / (h.rolling(252).max() - l.rolling(252).min()) - 1 / 2

    df['ATR'] = AverageTrueRange(h,l,c).average_true_range()

    df = df.drop(columns=['Open','High','Low','Close','Adj Close','Volume'])
    #df.iloc[:,1:-1] = df.iloc[:,1:-1].div(df.loc[:,'ATR'],axis=0)
    return df

In [5]:
train_data = dict()
for symbol in symbols:
    ta_data = add_ta_signals(data[symbol])
    train_data[symbol] = ta_data.dropna()

In [6]:
concat_train_data = pd.concat(train_data.values())

In [7]:
knns = [KNeighborsClassifier(n_neighbors=10) for _ in range(100)]
for i,knn in enumerate(knns):
        bootstrap_train_data = concat_train_data.sample(frac=0.3,replace=True)
        knn.fit(X=bootstrap_train_data.iloc[:,1:],
                y=np.sign(bootstrap_train_data.iloc[:,0]))

# Save model

In [11]:
is_save = input('Are you sure to save the models? This may overwrite the existing models. (y/n)')
if is_save == 'y':
    is_confirm = input('Please re-confirm. (y/n)')
    if is_confirm == 'y':
        for i in range(100):
            with open(f'models/knn_{i}.pkl','wb') as f:
                pickle.dump(knns[i],f)
        print('Models are saved.')
    print('Models are not saved.')
else:
    print('Models are not saved.')

Models are not saved.
