In [3]:
import pandas
import numpy
import xgboost as xgb
from numpy import loadtxt
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings('ignore')

#example.groupby('name')['number'].fillna(method='ffill')

def read_macro_data(url = 'https://raw.githubusercontent.com/nikhilchandra-stats/macrodatasetsraw/master/data/daily_fx_macro_data.csv', 
                    encoding_var = 'cp1252'):
    return pandas.read_csv(url, encoding=encoding_var)

def read_yahoo_finance(url = 'https://query1.finance.yahoo.com/v7/finance/download/AUDUSD=X?period1=1550448000&period2=1708214400&interval=1d&events=history&includeAdjustedClose=true', 
                       start_date = '2019-01-01',
                       end_date = '2024-02-16',
                       asset_symbol = 'AUDUSD'):
    
    if '.' in asset_symbol:
        symbol_string = asset_symbol + '?'
    else:
        symbol_string = asset_symbol + '=X?'         
    base_url = 'https://query1.finance.yahoo.com/v7/finance/download/'
    start_date_as_int = int(pandas.to_datetime(start_date).timestamp())
    start_date_string = 'period1=' + str(start_date_as_int) + '&'
    end_date_as_int = int(pandas.to_datetime(end_date).timestamp())
    end_date_string = 'period2=' + str(end_date_as_int) + '&interval=1d&events=history&includeAdjustedClose=true'
    complete_url = base_url + symbol_string + start_date_string + end_date_string
    returned_data = pandas.read_csv(complete_url) 
    return returned_data

# test = ['jj', 'S&P', 'RBA']
# final = [x for x in test if '(S&P|RBA)' in test]

# Build a data set that can be joined to the asset, This example looked at 2 macro features for 2 assets
# PMI for USD and RBA CPI for AUD

def join_asset_to_macro(macro_data = read_macro_data(),
                        macro_vars = ['S&P Global Manufacturing PMI', 'RBA Trimmed Mean CPI \(QoQ\)'],
                        symbol_vars = ['USD','AUD'],
                        asset_data = read_yahoo_finance(
                            start_date = '2019-01-01',
                            end_date = '2024-02-16',
                            asset_symbol = 'AUDUSD'), 
                        pip_value = 100
                        ):
    #Clean Input Variables
    macro_data_vars = "|".join(macro_vars)
    symbol_data_vars = "|".join(symbol_vars) 
    macro_data['event'] = macro_data['event'].str.replace('(?i)(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)','', regex=True)
    macro_dat_filt = macro_data[macro_data['event'].str.contains(macro_data_vars, regex=True)]
    macro_dat_filt2 =macro_dat_filt[macro_data['symbol'].str.contains(symbol_data_vars, regex=True)]

    asset_data['Date'] = pandas.to_datetime(asset_data['Date']).dt.date
    macro_dat_filt2['date'] = pandas.to_datetime(macro_dat_filt2['date']).dt.date
    asset_data = asset_data.rename(columns={'Date':'date'}) 
    
    macro_dat_filt2['actual'] = pandas.to_numeric(macro_dat_filt2['actual'])
    
    for i in  range(len(symbol_vars)):
        temp_data = macro_dat_filt2[macro_dat_filt2['event'].str.contains(macro_vars[i])]
        temp_data2 = temp_data[temp_data['symbol'].str.contains(symbol_vars[i])]
        temp_data3 = temp_data2[['date','actual']]
        new_col_name = macro_vars[i] + " "+ symbol_vars[i]
        temp_data4 = temp_data3.rename(columns={'actual':new_col_name})
        # merge_asset_macro['actual'] = merge_asset_macro.groupby('symbol')['actual'].fillna(method = "ffill")
        asset_data = pandas.merge( asset_data, temp_data4 ,
                                    on="date", how = "left" )
        asset_data[new_col_name] = asset_data[new_col_name].fillna(method='ffill')
        
    asset_data['daily return'] = (asset_data['Close'] - asset_data['Open'])*pip_value    
        
    return asset_data 





In [4]:

macro_data = read_macro_data()
macro_vars = ['S&P Global Manufacturing PMI', 'RBA Trimmed Mean CPI \(QoQ\)', 
              'CFTC AUD NC Net', 'CFTC Gold NC Net Positions', 'Consumer Price Index \(MoM\)',
              'Unemployment Change\(\)', 'Consumer Price Index \(YoY\)\(\)', 'Producer Price Index \(MoM\)\(\)', 
              'Retail Sales \(MoM\)\(\)', 'New Home Sales \(MoM\)\(\)']

symbol_vars = ['USD','AUD','AUD', 'USD', 'USD', 'EUR', 'EUR', 'USD', 'USD', 'USD']
asset_data = read_yahoo_finance( 
                start_date = '2014-01-01',
                end_date = '2024-02-16',
                asset_symbol = 'AUDUSD'
                ) 
pip_value = 100

asset_data_EUR_USD = asset_data = read_yahoo_finance( 
                start_date = '2014-01-01',
                end_date = '2024-02-16',
                asset_symbol = 'EURUSD'
                ) 

   
macro_test_set = join_asset_to_macro(macro_data=macro_data,
                                     macro_vars=macro_vars,
                                     symbol_vars=symbol_vars,
                                     asset_data=asset_data, 
                                     pip_value=pip_value  )

macro_test_set_EUR = join_asset_to_macro(macro_data=macro_data,
                                     macro_vars=macro_vars,
                                     symbol_vars=symbol_vars,
                                     asset_data=asset_data_EUR_USD, 
                                     pip_value=pip_value  )

macro_test_set = macro_test_set.dropna()
macro_test_set['dependant variable'] = numpy.where(macro_test_set['daily return'] > 0, 'win', 'loss')
macro_test_set['dependant variable bin'] = numpy.where(macro_test_set['daily return'] > 0, 1, 0)
macro_test_set['lagged daily return'] = macro_test_set['daily return'].shift(1)

macro_test_set_EUR['dependant variable'] = numpy.where(macro_test_set_EUR['daily return'] > 0, 'win', 'loss')
macro_test_set_EUR['dependant variable bin'] = numpy.where(macro_test_set_EUR['daily return'] > 0, 1, 0)
macro_test_set_EUR['lagged daily return'] = macro_test_set_EUR['daily return'].shift(1)

eur_aud_macro_test_set = pandas.concat([macro_test_set, macro_test_set_EUR])

eur_aud_macro_test_set.columns



Index(['date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume',
       'S&P Global Manufacturing PMI USD', 'RBA Trimmed Mean CPI \(QoQ\) AUD',
       'CFTC AUD NC Net AUD', 'CFTC Gold NC Net Positions USD',
       'Consumer Price Index \(MoM\) USD', 'Unemployment Change\(\) EUR',
       'Consumer Price Index \(YoY\)\(\) EUR',
       'Producer Price Index \(MoM\)\(\) USD', 'Retail Sales \(MoM\)\(\) USD',
       'New Home Sales \(MoM\)\(\) USD', 'daily return', 'dependant variable',
       'dependant variable bin', 'lagged daily return'],
      dtype='object')

In [36]:
output_var = eur_aud_macro_test_set['dependant variable bin']
input_vars = ['S&P Global Manufacturing PMI USD', 'RBA Trimmed Mean CPI \(QoQ\) AUD',
       'CFTC AUD NC Net AUD', 'CFTC Gold NC Net Positions USD',
       'Consumer Price Index \(MoM\) USD', 'Unemployment Change\(\) EUR',
       'Consumer Price Index \(YoY\)\(\) EUR',
       'Producer Price Index \(MoM\)\(\) USD', 'Retail Sales \(MoM\)\(\) USD',
       'New Home Sales \(MoM\)\(\) USD', 'lagged daily return']
inputs = eur_aud_macro_test_set[input_vars]

model = Sequential()
#Inpit layer
model.add(Dense(12, input_shape=(11,), activation='relu'))
#First Hidden Layer
model.add(Dense(8, activation='relu'))
#Second Hidden Layer
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(inputs, output_var, epochs=20, batch_size=500)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x22f0d90cc50>