In [1]:
import urllib.request
import pandas
import numpy
import bs4
import requests as rq
import urllib
import random
import itertools
import fs

def get_read_macro_data(url = 'https://raw.githubusercontent.com/nikhilchandra-stats/macrodatasetsraw/master/data/daily_fx_macro_data.csv', 
                    encoding_var = 'cp1252'):
    returned_data = pandas.read_csv(url, encoding=encoding_var)
    returned_data['date'] = pandas.to_datetime(returned_data['date'])
    return returned_data

def create_monthly_dummy_frame(start_date = '2011-01-01', end_date = '2024/04/24', freq_var = 'D'):
    dates_series = pandas.date_range(start=start_date, end=end_date,freq=freq_var)
    returned_data = pandas.DataFrame(dates_series, columns=['date_var'])
    return returned_data

def get_asset_file_path(asset_name, data_path = "C:/Users/Nikhil Chandra/Documents/Repos/IG Trading/data"):
    directory_object = fs.open_fs(data_path)
    files_in_directory = list(directory_object.listdir('/'))
    files_in_directory = list(map(lambda a:"C:/Users/Nikhil Chandra/Documents/Repos/IG Trading/data/" + a, files_in_directory))
    asset_file = pandas.Series(files_in_directory)
    asset_file = asset_file[asset_file.str.contains(asset_name, regex=True, case=True)].to_list()
    return asset_file[0]

def harmonise_dates_join(data_to_join,
                         date_col = 'date', 
                         start_date = '2011-01-01',
                         end_date = '2024/04/24', 
                         freq_var = 'D'):
    dummy_frame = create_monthly_dummy_frame(start_date = '2011-01-01', end_date = '2024/04/24', freq_var = 'D')
    joined_data = dummy_frame.merge(data_to_join, left_on='date_var', right_on=date_col,how='left')
    joined_data = joined_data.fillna(method='ffill')
    return joined_data

In [2]:
# Testing Functions

raw_macro_data = get_read_macro_data() 

In [38]:
def get_asset_index_vars(
        macro_data = get_read_macro_data(), 
        country_name = "AUD",
        rename_conditions_vars = ["Industry",
                                  "Manufacturing|Mfg", 
                                  "Construction",
                                  "Services", 
                                  "Westpac", 
                                  "National Australia Bank"], 
        rename_choices = ["Industry Index", 
                        "Manufacturing Index",
                        "Construction Index",
                        "Services Index", 
                        "Consumer Confidence Index", 
                        "Business Confidence Index"],                                  
        remove_filter = "QoQ"
        ):
    asset_data = macro_data[macro_data['symbol'].str.contains(country_name)]
    asset_data_index_vars = asset_data[['event', 'actual', 'date', 'symbol']]

    if remove_filter != "":
        asset_data_index_vars = asset_data_index_vars[~asset_data_index_vars["event"].str.contains(remove_filter)] 
        
    rename_conditions = []
    for i in range(0, len(rename_conditions_vars)):
            rename_conditions.append( (asset_data_index_vars['event'].str.contains(rename_conditions_vars[i])) )
            print(rename_conditions[i])
        
    print("Conditions Length ", len(rename_conditions) )
    print("Choices Length ",  len(rename_conditions_vars))

    asset_data_index_vars['Renamed Event'] = numpy.select(
            condlist=rename_conditions,
            choicelist= rename_choices, 
            default="Unknown")
        
    asset_data_index_vars = asset_data_index_vars[asset_data_index_vars['Renamed Event'] != "Unknown"]
    asset_data_index_vars = asset_data_index_vars[['Renamed Event', 'date', 'actual']]
    asset_data_index_vars_wide = asset_data_index_vars.pivot_table(index="date",
                                                    columns="Renamed Event", 
                                                    values="actual", 
                                                    aggfunc='median')
    asset_data_index_vars_wide = asset_data_index_vars_wide.ffill()
    asset_data_index_vars_wide['symbol'] = country_name
    return [asset_data_index_vars, asset_data_index_vars_wide]

aud_index_vars = get_asset_index_vars(
        macro_data = get_read_macro_data(), 
        country_name = "AUD",
        rename_conditions_vars = ["Industry",
                                  "AiG Manufacturing|AiG Mfg|AIG Manufacturing|AIG Mfg", 
                                  "AiG Construction|AIG Construction",
                                  "AiG Services|AIG Services", 
                                  "Westpac Consumer Confidence", 
                                  "National Australia Bank Business Confidence"
                                #   "Judo Bank Composite" 
                                #   "Judo Bank Service" 
                                #   "Retail Sales s\.a \(MoM\)", 
                                #   "Trade Balance \(MoM\)"
                                  ], 
        rename_choices = ["Industry Index", 
                        "Manufacturing Index",
                        "Construction Index",
                        "Services Index", 
                        "Consumer Confidence Index", 
                        "Business Confidence Index"
                        # "Judo Bank Composite Index" 
                        # "Judo Bank Service Index" 
                        # "Retail Sales", 
                        # "Trade Balance \(MoM\)"
                        ],                                  
        remove_filter = "QoQ"
)



0        False
2        False
40       False
41       False
68       False
         ...  
96108    False
96155    False
96156    False
96157    False
96158    False
Name: event, Length: 6494, dtype: bool
0        False
2        False
40       False
41       False
68       False
         ...  
96108    False
96155    False
96156    False
96157    False
96158    False
Name: event, Length: 6494, dtype: bool
0        False
2        False
40       False
41       False
68       False
         ...  
96108    False
96155    False
96156    False
96157    False
96158    False
Name: event, Length: 6494, dtype: bool
0        False
2        False
40       False
41       False
68       False
         ...  
96108    False
96155    False
96156    False
96157    False
96158    False
Name: event, Length: 6494, dtype: bool
0        False
2        False
40       False
41       False
68       False
         ...  
96108    False
96155    False
96156    False
96157    False
96158    False
Name: event, Length

In [39]:
wide_AUD_data = aud_index_vars[1]

In [37]:
macro_data = get_read_macro_data()
country_name = "AUD"
rename_conditions_vars = ["Industry",
                                  "Manufacturing|Mfg", 
                                  "Construction",
                                  "Services", 
                                  "Westpac", 
                                  "National Australia Bank"]
rename_choices = ["Industry Index", 
                        "Manufacturing Index",
                        "Construction Index",
                        "Services Index", 
                        "Consumer Confidence Index", 
                        "Business Confidence Index"]                                 
remove_filter = "QoQ"

asset_data = macro_data[macro_data['symbol'].str.contains(country_name)]
asset_data_index_vars = asset_data[['event', 'actual', 'date', 'symbol']]

if remove_filter != "":
  asset_data_index_vars = asset_data_index_vars[~asset_data_index_vars["event"].str.contains(remove_filter)] 
    
rename_conditions = []
for i in range(0, len(rename_conditions_vars)):
        rename_conditions.append( (asset_data_index_vars['event'].str.contains(rename_conditions_vars[i])) )
        print(rename_conditions[i])
    
print("Conditions Length ", len(rename_conditions) )
print("Choices Length ",  len(rename_conditions_vars))

asset_data_index_vars['Renamed Event'] = numpy.select(
        condlist=rename_conditions,
        choicelist= rename_choices, 
        default="Unknown")
    
asset_data_index_vars = asset_data_index_vars[asset_data_index_vars['Renamed Event'] != "Unknown"]
asset_data_index_vars = asset_data_index_vars[['Renamed Event', 'date', 'actual']]
asset_data_index_vars_wide = asset_data_index_vars.pivot_table(index="date",
                                                columns="Renamed Event", 
                                                values="actual", 
                                                aggfunc='median')
asset_data_index_vars_wide = asset_data_index_vars_wide.ffill()
asset_data_index_vars_wide['symbol'] = country_name

0        False
2        False
40       False
41       False
68       False
         ...  
96108    False
96155    False
96156    False
96157    False
96158    False
Name: event, Length: 6494, dtype: bool
0         True
2        False
40       False
41       False
68       False
         ...  
96108    False
96155    False
96156    False
96157     True
96158    False
Name: event, Length: 6494, dtype: bool
0        False
2        False
40       False
41       False
68       False
         ...  
96108    False
96155    False
96156    False
96157    False
96158    False
Name: event, Length: 6494, dtype: bool
0        False
2        False
40       False
41        True
68       False
         ...  
96108    False
96155    False
96156    False
96157    False
96158     True
Name: event, Length: 6494, dtype: bool
0        False
2        False
40       False
41       False
68       False
         ...  
96108    False
96155    False
96156    False
96157    False
96158    False
Name: event, Length