In [1]:
import pandas as pd
import swifter
import numpy as np
df = pd.read_csv("data/gc-1m.csv", sep=";")#,nrows=1000)
df.columns=["Date","Time", "Open", "High", "Low", "Close", "Volume"]

In [2]:
#Wilder’s Smoothing function
def Wilder(data, periods):
    start = np.where(~np.isnan(data))[0][0] #Check if nans present in beginning
    Wilder = np.array([np.nan]*len(data))
    Wilder[start+periods-1] = data[start:(start+periods)].mean() #Simple Moving Average
    for i in range(start+periods,len(data)):
        Wilder[i] = (Wilder[i-1]*(periods-1) + data[i])/periods #Wilder Smoothing
    return(Wilder)

In [3]:
dt=(df['Date'] + ' ' + df['Time']).swifter.apply(pd.to_datetime)
df["Time"]=dt
df.drop(columns=["Date","Volume"],inplace=True)
df.set_index(["Time"],inplace=True)

In [4]:
#Simple Moving Average (SMA)
df['SMA_5'] = df['Close'].transform(lambda x: x.rolling(window = 5).mean())
df['SMA_15'] = df['Close'].transform(lambda x: x.rolling(window = 15).mean())
df['SMA_ratio'] = df['SMA_15'] / df['SMA_5']

In [5]:
#Average True Range (ATR)
df['prev_close'] = df['Close'].shift(1)
df['TR'] = np.maximum((df['High'] - df['Low']),
                     np.maximum(abs(df['High'] - df['prev_close']),
                     abs(df['prev_close'] - df['Low'])))

TR_data = df.copy()
df['ATR_5'] = Wilder(TR_data['TR'], 5)
df['ATR_15'] = Wilder(TR_data['TR'], 15)
df['ATR_Ratio'] = df['ATR_5'] / df['ATR_15']

In [6]:
#Average Directional Index (ADX)
df['prev_high'] = df['High'].shift(1)
df['prev_low'] = df['Low'].shift(1)

df['+DM'] = np.where(~np.isnan(df.prev_high),
                           np.where((df['High'] > df['prev_high']) & 
         (((df['High'] - df['prev_high']) > (df['prev_low'] - df['Low']))), 
                                                                  df['High'] - df['prev_high'], 
                                                                  0),np.nan)

df['-DM'] = np.where(~np.isnan(df.prev_low),
                           np.where((df['prev_low'] > df['Low']) & 
         (((df['prev_low'] - df['Low']) > (df['High'] - df['prev_high']))), 
                                    df['prev_low'] - df['Low'], 
                                    0),np.nan)


ADX_data = df.copy()
df['+DM_5'] = Wilder(ADX_data['+DM'], 5)
df['-DM_5'] = Wilder(ADX_data['-DM'], 5)
df['+DM_15'] = Wilder(ADX_data['+DM'], 15)
df['-DM_15'] = Wilder(ADX_data['-DM'], 15)

df['+DI_5'] = (df['+DM_5']/df['ATR_5'])*100
df['-DI_5'] = (df['-DM_5']/df['ATR_5'])*100
df['+DI_15'] = (df['+DM_15']/df['ATR_15'])*100
df['-DI_15'] = (df['-DM_15']/df['ATR_15'])*100

df['DX_5'] = (np.round(abs(df['+DI_5'] - df['-DI_5'])/(df['+DI_5'] + df['-DI_5']) * 100))

df['DX_15'] = (np.round(abs(df['+DI_15'] - df['-DI_15'])/(df['+DI_15'] + df['-DI_15']) * 100))


ADX_data = df.copy()
df['ADX_5'] = Wilder(ADX_data['DX_5'], 5)
df['ADX_15'] = Wilder(ADX_data['DX_15'], 15)

In [7]:
#Stochastic Oscillators
df['Lowest_5D'] = df['Low'].transform(lambda x: x.rolling(window = 5).min())
df['High_5D'] = df['High'].transform(lambda x: x.rolling(window = 5).max())
df['Lowest_15D'] = df['Low'].transform(lambda x: x.rolling(window = 15).min())
df['High_15D'] = df['High'].transform(lambda x: x.rolling(window = 15).max())

df['Stochastic_5'] = ((df['Close'] - df['Lowest_5D'])/(df['High_5D'] - df['Lowest_5D']))*100
df['Stochastic_15'] = ((df['Close'] - df['Lowest_15D'])/(df['High_15D'] - df['Lowest_15D']))*100

df['Stochastic_%D_5'] = df['Stochastic_5'].rolling(window = 5).mean()
df['Stochastic_%D_15'] = df['Stochastic_5'].rolling(window = 15).mean()

df['Stochastic_Ratio'] = df['Stochastic_%D_5']/df['Stochastic_%D_15']

In [8]:
#Relative Strength Index (RSI)
df['Diff'] = df['Close'].transform(lambda x: x.diff())
df['Up'] = df['Diff']
df.loc[(df['Up']<0), 'Up'] = 0

df['Down'] = df['Diff']
df.loc[(df['Down']>0), 'Down'] = 0 
df['Down'] = abs(df['Down'])

df['avg_5up'] = df['Up'].transform(lambda x: x.rolling(window=5).mean())
df['avg_5down'] = df['Down'].transform(lambda x: x.rolling(window=5).mean())

df['avg_15up'] = df['Up'].transform(lambda x: x.rolling(window=15).mean())
df['avg_15down'] = df['Down'].transform(lambda x: x.rolling(window=15).mean())

df['RS_5'] = df['avg_5up'] / df['avg_5down']
df['RS_15'] = df['avg_15up'] / df['avg_15down']

df['RSI_5'] = 100 - (100/(1+df['RS_5']))
df['RSI_15'] = 100 - (100/(1+df['RS_15']))

df['RSI_ratio'] = df['RSI_5']/df['RSI_15']

In [9]:
#Moving Average Convergence Divergence (MACD)
df['12Ewm'] = df['Close'].transform(lambda x: x.ewm(span=12, adjust=False).mean())
df['26Ewm'] = df['Close'].transform(lambda x: x.ewm(span=26, adjust=False).mean())
df['MACD'] = df['26Ewm'] - df['12Ewm']

In [10]:
#Bollinger Bands
df['15MA'] = df['Close'].transform(lambda x: x.rolling(window=15).mean())
df['SD'] = df['Close'].transform(lambda x: x.rolling(window=15).std())
df['upperband'] = df['15MA'] + 2*df['SD']
df['lowerband'] = df['15MA'] - 2*df['SD']

In [11]:
#Rate of Change
df['RC'] = df['Close'].transform(lambda x: x.pct_change(periods = 15))

In [12]:
df.interpolate()
df.dropna()
df.to_csv("data/gc-1m_all.csv")