In [31]:
import json
import requests
from datetime import datetime, timedelta

#load API key
config_path = "../config.json"

with open(config_path, "r") as file:
    config = json.load(file)

api_key = config["STOCK_API_KEY"]

#load bronze file
bronze_file = "bronze_data.json"

#read file to collect dates and symbols, and store a key to avoid data duplication
existing_data = {}
most_recent_date = None
symbols = []

with open(bronze_file, 'r') as file:
    file_data = [json.loads(line) for line in file]
    for dataset in file_data:
        symbol = dataset['symbol']
        if symbol not in symbols:
            symbols.append(symbol) 
        for entry in dataset['data']:
            date = datetime.fromisoformat(entry['date'][:-1]).date()
            avoid_duplicates_key = (symbol, date) 
            existing_data[avoid_duplicates_key] = True
            if most_recent_date is None or date > most_recent_date:
                most_recent_date = date 

#print results
if most_recent_date:
    print("Most recent date:", most_recent_date)
    
else:
    print("No dates found in the data.")

Most recent date: 2024-11-29


In [33]:
#get new dates from the API
if most_recent_date:
    start_date = (most_recent_date + timedelta(days=1)).strftime("%Y-%m-%d")
    with open(bronze_file, "a") as file:
        for symbol in symbols:
            params={'api_token':api_key, 'symbols':symbol, 'date_from':start_date}
            r = requests.get('https://api.stockdata.org/v1/data/eod', params=params)
            print(f'Loading data from {symbol}')
            if r.status_code == 200:
                (print(f'Successfully fetched data from {symbol}!'))
                #first, check if there is new data
                new_data = r.json()
                if not new_data.get('data'):
                    print(f"No new data available for {symbol} starting at {start_date}.")
                filtered_data = []
                for entry in new_data.get('data', []):
                    date = datetime.fromisoformat(entry['date'][:-1]).date()
                    avoid_duplicates_key = (symbol, date)
                    if avoid_duplicates_key not in existing_data:
                        filtered_data.append(entry)
                        existing_data[avoid_duplicates_key] = True
                    
                    #only append new data
                if filtered_data:
                    new_data['data'] = filtered_data
                    new_data['symbol'] = symbol
                    file.write(json.dumps(new_data) + "\n")
                    print(f"{symbol} data saved successfully.")
                    print(new_data)
            else:
                print(f"Error loading data for {symbol}: {r.status_code}")
                break
else:
    print('There is no recent date.')

Loading data from ABEV
Successfully fetched data from ABEV!
Loading data from AFYA
Successfully fetched data from AFYA!
Loading data from AMPL
Successfully fetched data from AMPL!
Loading data from ARCO
Successfully fetched data from ARCO!
Loading data from BZFD
Successfully fetched data from BZFD!
Loading data from BSBR
Successfully fetched data from BSBR!
Loading data from BYND
Successfully fetched data from BYND!
Loading data from DNUT
Successfully fetched data from DNUT!
Loading data from FOX
Successfully fetched data from FOX!
Loading data from ERIC
Successfully fetched data from ERIC!
