In [0]:
#%python
#%pip install requests
#%pip install azure-storage-blob

[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m
[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


In [0]:
import requests
import pandas as pd
from datetime import datetime, timedelta
import json
from io import StringIO
from azure.storage.blob import BlobServiceClient
import io

def get_yesterday_date():
    yesterday = datetime.now() - timedelta(1)
    return yesterday.strftime("%Y%m%d")

def make_gapi_request():
    api_key = "goldapi-KEY-io" 
    symbol = "XAU"
    curr = "USD"
    date = get_yesterday_date()

    url = f"https://www.goldapi.io/api/{symbol}/{curr}/{date}"

    headers = {
        "x-access-token": api_key,
        "Content-Type": "application/json"
    }

    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print("Error:", str(e))
        return None

def upload_to_datalake(data, container_name, folder_name, file_name):
    connection_string = "CONNECTION STRING TO ADL"

    blob_service_client = BlobServiceClient.from_connection_string(connection_string)

    container_client = blob_service_client.get_container_client(container_name)

    csv_output = StringIO()
    csv_writer = pd.DataFrame(data)

    csv_writer.to_csv(csv_output, index=False)
    
    blob_client = container_client.get_blob_client(f"{folder_name}/{file_name}")
    blob_client.upload_blob(csv_output.getvalue(), overwrite=True)

    print(f"Data uploaded to {container_name}/{folder_name}/{file_name}")

def check_if_file_exists(container_name, folder_name, file_name):
    connection_string = "CONNECTION STRING TO ADL"
    
    blob_service_client = BlobServiceClient.from_connection_string(connection_string)

    container_client = blob_service_client.get_container_client(container_name)
    
    try:
        blob_client = container_client.get_blob_client(f"{folder_name}/{file_name}")
        blob_client.download_blob()
        return True  # File exists
    except:
        return False  # File does not exist

def main():
    data = make_gapi_request()

    if data and len(data) > 0:  
        print("API Response:", data)  

        if isinstance(data, dict):
            print("Data is a single record, wrapping it in a list.")  
            data = [data]

        print("Data after wrapping:", data)

        current_year_month = datetime.now().strftime("%Y%m")  
        
        file_name = f"{current_year_month}.csv"
        
        container_name = "gold-forecasting-container"  
        folder_name = "raw-data"  
        
        file_exists = check_if_file_exists(container_name, folder_name, file_name)

        if file_exists:
            print(f"File {file_name} exists, appending new data.")
            blob_client = BlobServiceClient.from_connection_string("CONNECTION STRING TO ADL").get_container_client(container_name).get_blob_client(f"{folder_name}/{file_name}")
            existing_data = pd.read_csv(io.StringIO(blob_client.download_blob().readall().decode()))
            
            new_data = pd.DataFrame(data)
            updated_data = pd.concat([existing_data, new_data], ignore_index=True)
            updated_data.to_csv(file_name, index=False)
        else:
            print(f"File {file_name} doesn't exist, creating new file.")
            new_data = pd.DataFrame(data)
            new_data.to_csv(file_name, index=False)

        upload_to_datalake(updated_data if file_exists else new_data, container_name, folder_name, file_name)
    else:
        print("No data retrieved from the API.")

main()
