In [15]:
import urllib.request
import pandas
import numpy
import bs4
import requests as rq
import urllib
import random
import itertools
import fs

def read_macro_data(url = 'https://raw.githubusercontent.com/nikhilchandra-stats/macrodatasetsraw/master/data/daily_fx_macro_data.csv', 
                    encoding_var = 'cp1252'):
    returned_data = pandas.read_csv(url, encoding=encoding_var)
    returned_data['date'] = pandas.to_datetime(returned_data['date'])
    return returned_data

def create_monthly_dummy_frame(start_date = '2011-01-01', end_date = '2024/04/24', freq_var = 'D'):
    dates_series = pandas.date_range(start=start_date, end=end_date,freq=freq_var)
    returned_data = pandas.DataFrame(dates_series, columns=['date_var'])
    return returned_data

def get_asset_file_path(asset_name, data_path = "C:/Users/Nikhil Chandra/Documents/Repos/IG Trading/data"):
    directory_object = fs.open_fs(data_path)
    files_in_directory = list(directory_object.listdir('/'))
    files_in_directory = list(map(lambda a:"C:/Users/Nikhil Chandra/Documents/Repos/IG Trading/data/" + a, files_in_directory))
    asset_file = pandas.Series(files_in_directory)
    asset_file = asset_file[asset_file.str.contains(asset_name, regex=True, case=True)].to_list()
    return asset_file[0]

def harmonise_dates_join(data_to_join,
                         date_col = 'date', 
                         start_date = '2011-01-01',
                         end_date = '2024/04/24', 
                         freq_var = 'D'):
    dummy_frame = create_monthly_dummy_frame(start_date = '2011-01-01', end_date = '2024/04/24', freq_var = 'D')
    joined_data = dummy_frame.merge(data_to_join, left_on='date_var', right_on=date_col,how='left')
    joined_data = joined_data.fillna(method='ffill')
    return joined_data

def get_technical_data(data_path = 
                       "C:/Users/Nikhil Chandra/Documents/Repos/NEMA/data/currency_technical"
                       ):
    files_in_directory = fs.open_fs(data_path)
    file_list = files_in_directory.listdir('/')
    full_file_path = list(map(lambda x: data_path + "/" + x, file_list))
    full_file_path = pandas.Series(full_file_path)
    full_file_path = full_file_path[full_file_path.str.contains("technical_[0-9][0-9]",   case = True, regex=True)]
    data_from_files = full_file_path.to_list()
    data_from_files = map( lambda x: pandas.read_csv(x), data_from_files)
    data_from_files = list(data_from_files)
    data_from_files = pandas.concat(data_from_files)
    return data_from_files

class ASSET:
    def __init__(self, name,country,data_path = "C:/Users/Nikhil Chandra/Documents/Repos/IG Trading/data"):
        self.name = name
        self.country = country
        self.data_path = data_path
        self.asset_path = get_asset_file_path(self.name, self.data_path)

    def __str__(self):
        return f"Asset:{self.name}, Country:{self.country}"
    
    def ASSET_get_macro(self):
        raw_macro_data = read_macro_data()
        returned_data = raw_macro_data[raw_macro_data['symbol'] == self.country]
        return returned_data
    
    def ASSET_ts_data(self):
        returned_data = pandas.read_csv(self.asset_path)
        returned_data['ASSET'] = self.name
        return returned_data
    
    def ASSET_CPI(self):
        raw_macro_data = read_macro_data()
        returned_data = raw_macro_data[raw_macro_data['symbol'] == self.country]
        returned_data = returned_data[returned_data['event'].str.contains('CPI|Consumer Price',case=True, regex=True)]
        if self.country == "AUD":
            returned_data = returned_data[returned_data['event'].str.contains('QoQ',case=True,regex=False)]
        if self.country == "EUR":
            returned_data = returned_data[returned_data['event'].str.contains('MoM',case=True,regex=False)]
            returned_data = returned_data[returned_data['event'].str.contains('EU norm',case=True,regex=False)]
            returned_data = returned_data[~returned_data['event'].str.contains('Prel',case=True,regex=False)] 
        if self.country == "USD":
            returned_data = returned_data[returned_data['event'].str.contains('MoM',case=True,regex=False)]
            returned_data = returned_data[returned_data['event'].str.contains('ex Food',case=True,regex=False)]
            returned_data = returned_data[~returned_data['event'].str.contains('Prel',case=True,regex=False)]
        if self.country == "CAD":
            returned_data = returned_data[returned_data['event'].str.contains('MoM',case=True,regex=False)]
            returned_data = returned_data[returned_data['event'].str.contains('Core',case=True,regex=False)]              

        # returned_data['event'] = "CPI"           
        return returned_data


AUD_ASSET = ASSET(name="AUD_USD", country="AUD")
JPY_ASSET = ASSET(name = "JPY", country = "JPY")
# EUR_ASSET = ASSET(name="EURUSD", country="EUR")
# USD_ASSET = ASSET(name="USDJPY", country="USD")
# CAD_ASSET = ASSET(name="CADUSD", country="CAD")

AUD_MACRO_DATA = AUD_ASSET.ASSET_get_macro()
AUD_TS = AUD_ASSET.ASSET_ts_data()
AUD_CPI = AUD_ASSET.ASSET_CPI()
PRICE_JOINED_CPI = harmonise_dates_join(data_to_join=AUD_CPI, 
                                        date_col='date', 
                                        start_date='2011-01-01',
                                        end_date='2024-04-24',
                                          freq_var='D')
PRICE_JOINED_CPI = PRICE_JOINED_CPI[['date_var', 'event', 'actual']]
ASSET_TS_DATA_PRICE = AUD_TS[['Date', 'Open', 'High', 'Low', 'ASSET']]
ASSET_TS_DATA_PRICE['Date'] = pandas.to_datetime(ASSET_TS_DATA_PRICE['Date'])
CPI_JOINED_AUD = ASSET_TS_DATA_PRICE.merge(PRICE_JOINED_CPI, 
                                           left_on='Date', 
                                           right_on='date_var',
                                             how='left')
CPI_JOINED_AUD = CPI_JOINED_AUD.dropna(subset=['event'])
CPI_JOINED_AUD = CPI_JOINED_AUD.drop(columns=['date_var'])

raw_df = CPI_JOINED_AUD
dependant_vars = ['actual']
dependant_df = raw_df.loc[:, dependant_vars]

# def create_lm_array(data_frame_var, dependant_var, independant_var):

# EUR_MACRO_DATA = EUR_ASSET.ASSET_get_macro()
# USD_MACRO_DATA = USD_ASSET.ASSET_get_macro()
# CAD_MACRO_DATA = CAD_ASSET.ASSET_get_macro()
# EUR_MACRO_DATA = EUR_MACRO_DATA[EUR_MACRO_DATA['event'].str.contains('CPI|Consumer Price',case=True, regex=True)]

# joined_data_AUD = harmonise_dates_join(data_to_join = AUD_MACRO_DATA,
#                          date_col = 'date', 
#                          start_date = '2011-01-01',
#                          end_date = '2024/04/24', 
#                          freq_var = 'D')

#-------------------------------------------Create FX Data Base


# data_path = "C:/Users/Nikhil Chandra/Documents/Repos/IG Trading/data"
# assets = ['AUD_USD']
# asset_of_interest = '|'.join(assets)
# directory_object = fs.open_fs(data_path)
# files_in_directory = list(directory_object.listdir('/'))
# files_in_directory = list(map(lambda a:"C:/Users/Nikhil Chandra/Documents/Repos/IG Trading/data/" + a, files_in_directory))
# asset_file = pandas.Series(files_in_directory)
# asset_file = asset_file[asset_file.str.contains(asset_of_interest, regex=True, case=True)].to_list()

# def read_and_wrangle_asset(file_path, asset_name):
#     returned_data = pandas.read_csv(file_path) 
#     returned_data['ASSET'] = asset_name

# test = get_asset_path(asset_name="AUD_USD")

# print(test[0])


# joined_data_EUR = harmonise_dates_join(data_to_join = EUR_MACRO_DATA,
#                          date_col = 'date', 
#                          start_date = '2011-01-01',
#                          end_date = '2024/04/24', 
#                          freq_var = 'D')

# urlx = "https://au.finance.yahoo.com/quote/AUDUSD%3DX/history/"
# headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36'}
# print('{}#!{}'.format(urlx, 'meme'))
# r = rq.get('{}#!{}'.format(urlx, 'meme'), headers=headers)
# r.status_code
# html_read = r.text
# soup = bs4.BeautifulSoup(html_read, "html.parser")
# table_find = soup.find_all("table")
# df = pandas.read_html(str(table_find))
# print(len(df))
# print(df)

# url_investing= "https://www.investing.com/"
# headers = {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"}
# Here the user agent is for Edge browser on windows 10. You can find your browser user agent from the above given link.
# r = rq.get(url=url_investing, headers=headers)

# print(r.status_code)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ASSET_TS_DATA_PRICE['Date'] = pandas.to_datetime(ASSET_TS_DATA_PRICE['Date'])
