In [34]:
%reset -f
import sys
import os

PROJ_PATH = os.path.abspath(os.path.join(os.getcwd(), ".."))

sys.path.append(PROJ_PATH) 

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.metrics import mean_squared_error
import itertools
import random

import datetime as DT
from datetime import date
from datetime import datetime, timedelta
#import yfinance as yf

import time
from multiprocessing import Pool as ProcessPool 
from multiprocessing.dummy import Pool as ThreadPool

import requests
from io import StringIO
import helper_functions as hfs


from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry 

import warnings
warnings.simplefilter("ignore", UserWarning)
warnings.filterwarnings("ignore")

pd.set_option("display.max_columns", None)  # to see all columns of dataframe
pd.set_option("display.max_rows", None)  # to see all rows of dataframe
pd.set_option("display.max_colwidth", None)  # to see the full (un-truncated) dataframe field/cell value 

In [35]:
import configparser

env_config = configparser.ConfigParser()
env_config.read(f'{PROJ_PATH}/kye/env_config.ini')

# <<<<<<<<<<<<<<< >>>>>>>>>>>>>>>>>>>>>>> #
SPLIT_DATE = env_config['ENV_VARS']['SPLIT_DATE']
config = {}
config['api_key'] = env_config['KEYS']['DATA_KEY']
ML_DATA_FOLDER_PATH = env_config['LOCAL_PATHS']['ML_DATA_FOLDER_PATH']
PROCESSING_TEMP_FOLDER_PATH = env_config['LOCAL_PATHS']['PROCESSING_TEMP_FOLDER_PATH'] 

#@@@@@@ **********************<<<<<<<<<<<<#

print("SPLIT_DATE: ", SPLIT_DATE)
today = str(date.today())
FROM_DATE = pd.to_datetime(date.today(), format='%Y-%m-%d') - DT.timedelta(days=((365*4) + 100)) # "2016-01-01"
to_date = str(date.today())
print(FROM_DATE) 

SPLIT_DATE:  2023-08-09
2019-05-02 00:00:00


In [36]:
indices = ['GSPC']

# Historical data on Indices

In [37]:
def get_indices_history_eod(symbol="DJI", from_yyyy_mm_dd = "all", to_yyyy_mm_dd = "all", api_token = config['api_key']):
        
    if from_yyyy_mm_dd != "all":
        #url = "https://eodhistoricaldata.com/api/eod/" + symbol + ".INDX?from="+from_yyyy_mm_dd+"&to="+to_yyyy_mm_dd+"&api_token=" + config['api_key']
        url = "https://eodhistoricaldata.com/api/eod/" + symbol + ".INDX?from="+from_yyyy_mm_dd+"&api_token=" + config['api_key']
    else:
        url = "https://eodhistoricaldata.com/api/eod/" + symbol + ".INDX?&api_token=" + config['api_key']

    r = hfs.requests_retry_session().get(url)
    c = 0
    while r.status_code != requests.codes.ok:

        #print("status code not ok ", r.status_code,  symbol)
        #print(symbol,":", r.status_code, ", ", r.reason)

        if r.status_code == 429: 
            if c == 5:
                print("c == 5 breaking", symbol)
                break
            time.sleep(random.randint(4, 8))
            r = hfs.requests_retry_session().get(url)
            c+=1

        else:
            print(symbol, " breaking : " + str(r.status_code) + " <> status code") 
            break

    else:

        df = pd.read_csv(StringIO(r.text), skipfooter=0, engine='python').reset_index(drop=True) #  
        df['Ticker'] = symbol
        return df.reset_index(drop=True) 

### Getting Indices' Daily OHLCV

In [38]:
tickers = indices

args = ((symbol,      
        str(FROM_DATE).split()[0],   # From date
         str(date.today()), # to date
        config['api_key']) for symbol in tickers )

indices_daily_ohlcv = hfs.get_data_from_API(tickers = tickers,
                                          func_to_run = get_indices_history_eod,
                                          func_args = args )

print("unique #tickers: ", indices_daily_ohlcv['Ticker'].nunique()) 
print(indices_daily_ohlcv.shape) 
indices_daily_ohlcv = indices_daily_ohlcv[-pd.isnull(indices_daily_ohlcv['Close'])].reset_index(drop=True) 
print(indices_daily_ohlcv.shape) 
indices_daily_ohlcv.tail(2) 

chunk  1 :  1
total time:  0.2059628963470459
returning df
unique #tickers:  1
(1076, 8)
(1076, 8)


Unnamed: 0,Date,Open,High,Low,Close,Adjusted_close,Volume,Ticker
1074,2023-08-08,4498.0298,4503.3101,4464.3901,4499.3799,4499.3799,3884910000,GSPC
1075,2023-08-09,4501.5698,4502.4399,4461.3301,4467.71,4467.71,2018497000,GSPC


### Adjust for splits

In [39]:
indices_daily_ohlcv['close_to_adj_close_ratio'] = (indices_daily_ohlcv['Close'] / indices_daily_ohlcv['Adjusted_close']) #.astype(int) 

indices_daily_ohlcv['Adj_Open'] = np.where(indices_daily_ohlcv['close_to_adj_close_ratio'] == 1, 
                                          indices_daily_ohlcv['Open'],
                                          indices_daily_ohlcv['Open'] / indices_daily_ohlcv['close_to_adj_close_ratio'])

indices_daily_ohlcv['Adj_High'] = np.where(indices_daily_ohlcv['close_to_adj_close_ratio'] == 1, 
                                          indices_daily_ohlcv['High'],
                                          indices_daily_ohlcv['High'] / indices_daily_ohlcv['close_to_adj_close_ratio'])

indices_daily_ohlcv['Adj_Low'] = np.where(indices_daily_ohlcv['close_to_adj_close_ratio'] == 1, 
                                          indices_daily_ohlcv['Low'],
                                          indices_daily_ohlcv['Low'] / indices_daily_ohlcv['close_to_adj_close_ratio'])

indices_daily_ohlcv['Adj_Volume'] = np.where(indices_daily_ohlcv['close_to_adj_close_ratio'] == 1, 
                                          indices_daily_ohlcv['Volume'],
                                          indices_daily_ohlcv['Volume'] * indices_daily_ohlcv['close_to_adj_close_ratio'])

indices_daily_ohlcv.rename(columns={"Adjusted_close":"Adj_Close"}, inplace=True)

indices_daily_ohlcv = indices_daily_ohlcv[['Ticker', 'Date', 'Adj_Open', 'Adj_High', 'Adj_Low', 
                                         'Adj_Close', 'Adj_Volume', 'close_to_adj_close_ratio']] 

print(indices_daily_ohlcv.shape) 
indices_daily_ohlcv.tail(2) 

(1076, 8)


Unnamed: 0,Ticker,Date,Adj_Open,Adj_High,Adj_Low,Adj_Close,Adj_Volume,close_to_adj_close_ratio
1074,GSPC,2023-08-08,4498.0298,4503.3101,4464.3901,4499.3799,3884910000.0,1.0
1075,GSPC,2023-08-09,4501.5698,4502.4399,4461.3301,4467.71,2018497000.0,1.0


In [40]:
indices_daily_ohlcv.to_pickle(f'{PROJ_PATH}/{ML_DATA_FOLDER_PATH}/indices_daily_ohlcv_{today}.pkl') 

In [41]:
%reset -f