In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
import ta

In [2]:
#read raw data
def readFinancialData(name):
    df = pd.read_csv(f'FinancialData/{name}.csv')
    df = df.set_index('Date')
    return df.drop('Code', axis=1)

In [3]:
#add technical indicators
def addIndicators(df, windows):
    high = df['High']
    low = df['Low']
    close = df['Close']
    #volume = df['Volume']
    for i in windows:
        df[f'Close_ratio_{i}'] = close / close.rolling(i).mean()
        df[f'High_ratio_{i}'] = high / high.rolling(i).mean()
        df[f'Low_ratio_{i}'] = low / low.rolling(i).mean()
        #RSI
        df[f'RSI_{i}'] = ta.momentum.RSIIndicator(close=close, window=i).rsi()
        #Stochastic Oscillator
        df[f'STO_{i}'] = ta.momentum.StochasticOscillator(high=high, low=low, close=close, window=i, smooth_window=3).stoch()
        #Stochastic Oscillator signal
        df[f'STOsig_{i}'] = ta.momentum.StochasticOscillator(high=high, low=low, close=close, window=i, smooth_window=3).stoch_signal()
        #Momentum
        df[f'Momentum_{i}'] = pd.Series(df['Close'].diff(i))
        #Money Flow Index (MFI)
        #df[f'MFI_{i}'] = ta.volume.MFIIndicator(high=high, low=low, close=close, volume=volume, window=i).money_flow_index()
        #Rate of Change (ROC)
        df[f'ROC_{i}'] = ta.momentum.ROCIndicator(close=close, window=i).roc()
        #Commodity Channel Index (CCI)
        #df[f'CCI_{i}'] = ta.trend.CCIIndicator(high=high, low=low, close=close, window=i).cci()
        #ease of movement (EOM)
        #This indicator is providing nan values in the end, which is not normal
        #df[f'EOM_{i}'] = ta.volume.EaseOfMovementIndicator(high=high, low=low, volume=volume, window=i).sma_ease_of_movement()
        #Trix
        df[f'Trix_{i}'] = ta.trend.TRIXIndicator(close=close, window=i).trix()
        #Vortex
        df[f'Vortex_{i}'] = ta.trend.VortexIndicator(high=high, low=low, close=close, window=i).vortex_indicator_diff()
        #EMA
        df[f'EMA_{i}'] = ta.trend.EMAIndicator(close=close, window=i).ema_indicator()
    #Accumulation/Distribution Index (ADI) (ACCDIST)
    #df['ADI'] = ta.volume.AccDistIndexIndicator(high=high, low=low, close=close, volume=volume).acc_dist_index()
    #On-balance-Volume
    #df['OBV'] = ta.volume.OnBalanceVolumeIndicator(close=close, volume=volume).on_balance_volume()
    #MACD, window_slow: int = 26, window_fast: int = 12, window_sign: int = 9
    df['MACD'] = ta.trend.MACD(close=close).macd()
    df['MACD_diff'] = ta.trend.MACD(close=close).macd_diff()
    df['MACD_sig'] = ta.trend.MACD(close=close).macd_signal()
    return df.iloc[windows[-1]*3:]

In [4]:
#print out any column contains nan values
def checknan(X):
    for col in X.columns:
        for value in list(X[col].items()):
            if np.isnan(value[1]):
                print(col,value[0])
            if value[1] > 3.4e+38:
                print(f'nan in {col}')

In [5]:
#create targets with n days forward
def getTarget(df,n):
    y = (df['Close'].shift(-n) / df['Close'])
    y.name = 'Target'
    return y

In [6]:
#train and predict
def train_predict(X, n, range_row):
    y = getTarget(X, n)
    model = RandomForestRegressor(n_estimators=200, min_samples_split=100, random_state=3)
    begin_row = range_row[0]
    final_row = range_row[1]
    X_train = X.iloc[:begin_row-n]
    y_train = y.iloc[:begin_row-n]
    model.fit(X=X_train, y=y_train)
    if final_row != 0:
        y_pred = model.predict(X.iloc[begin_row:final_row])
    else:
        return model.predict(np.array(X.iloc[-1]).reshape(1,-1))
    return y_pred

In [7]:
#create training groups backward
def getRange(length, n_group):
    l = []
    start = -1
    for i in range(n_group):
        l.append((start-length*(i+1), start-length*i))
    return l

### Main cells

In [None]:
#generate predictions for neural network training
windows = [2,3,5,14,21]
length = 63
n_group = 7
tickers = 'SPX,NKX,2YUSY,10YUSY,USDEUR,USDJPY,BTCUSD,ETH'
for ticker in tickers.split(','):
    print(ticker, end='')
    data = readFinancialData(ticker)
    X = addIndicators(data, windows)
    checknan(X)
    ranges = getRange(length,n_group)
    df_pred = pd.DataFrame(index = data.index)
    for r in ranges:
        print('\ngroup',r, end=' ')
        for n in range(1,16):
            if str(n) not in df_pred.columns:
                df_pred[str(n)] = 0
                #print(f'create col {n}',end=' ')
            print(n, end=' ')
            y_pred = train_predict(X, n, r)
            y_series = pd.Series(y_pred, index=X.iloc[r[0]:r[1]].index)
            df_pred.loc[y_series.index,str(n)] += y_series
    df_pred.to_csv(f'FinancialData/{ticker}_pred2.csv')
    print(ticker, 'saved')

In [None]:
#predict for latest day
ticker = 'ETH'

windows = [2,3,5,14,21]
data = readFinancialData(ticker)
X = addIndicators(data, windows)
checknan(X)
df_pred = pd.DataFrame(index = data.index)

for n in range(1,16):
    if str(n) not in df_pred.columns:
        df_pred[str(n)] = 0
        #print(f'create col {n}',end=' ')
    print(n, end=' ')
    y_pred = train_predict(X, n, (-1,0))
    y_series = pd.Series(y_pred, index=[X.iloc[-1].name])
    df_pred.loc[y_series.index,str(n)] += y_series

### Visualization of data

In [None]:
for i in list(df_pred.iloc[-1]):
    print(i,end=',')

In [None]:
X.columns

In [None]:
X.shape

In [None]:
X.index