In [0]:
#%python
#%pip install requests
#%pip install azure-storage-blob

[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m
[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


In [0]:
import requests
import pandas as pd
from datetime import datetime, timedelta
import json
from io import StringIO
from azure.storage.blob import BlobServiceClient
import io

# Function to get yesterday's date
def get_yesterday_date():
    yesterday = datetime.now() - timedelta(1)
    return yesterday.strftime("%Y%m%d")

# API request function to fetch gold price data
def make_gapi_request():
    api_key = "goldapi-KEY-io" 
    symbol = "XAU"
    curr = "USD"
    date = get_yesterday_date()

    url = f"https://www.goldapi.io/api/{symbol}/{curr}/{date}"

    headers = {
        "x-access-token": api_key,
        "Content-Type": "application/json"
    }

    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print("Error:", str(e))
        return None

# Function to upload the data to Azure Data Lake (Blob Storage)
def upload_to_datalake(data, container_name, folder_name, file_name):
    connection_string = "CONNECTION STRING TO ADL"

    # Initialize BlobServiceClient
    blob_service_client = BlobServiceClient.from_connection_string(connection_string)

    # Get the container client
    container_client = blob_service_client.get_container_client(container_name)

    # Convert data to CSV format (using StringIO to simulate a file object)
    csv_output = StringIO()
    csv_writer = pd.DataFrame(data)

    # Write data to CSV
    csv_writer.to_csv(csv_output, index=False)
    
    # Upload the data to Azure Data Lake
    blob_client = container_client.get_blob_client(f"{folder_name}/{file_name}")
    blob_client.upload_blob(csv_output.getvalue(), overwrite=True)

    print(f"Data uploaded to {container_name}/{folder_name}/{file_name}")

# Function to check if the file already exists in the container
def check_if_file_exists(container_name, folder_name, file_name):
    connection_string = "CONNECTION STRING TO ADL"
    
    # Initialize BlobServiceClient
    blob_service_client = BlobServiceClient.from_connection_string(connection_string)

    # Get the container client
    container_client = blob_service_client.get_container_client(container_name)
    
    # Check if the file exists
    try:
        blob_client = container_client.get_blob_client(f"{folder_name}/{file_name}")
        blob_client.download_blob()
        return True  # File exists
    except:
        return False  # File does not exist

# Main execution function
def main():
    # Get the data from the API
    data = make_gapi_request()

    if data and len(data) > 0:  # Check if data is not empty
        print("API Response:", data)  # Debug: print the API response to inspect the structure

        # Ensure that data is a list of records (wrap in list if it's a single dictionary)
        if isinstance(data, dict):
            print("Data is a single record, wrapping it in a list.")  # Debug: print a message
            data = [data]

        # Check the structure of the data after wrapping
        print("Data after wrapping:", data)

        # Get the current month and year for file naming
        current_year_month = datetime.now().strftime("%Y%m")  # e.g., "202412"
        
        # Define file name based on the current month and year
        file_name = f"{current_year_month}.csv"
        
        container_name = "gold-forecasting-container"  # Azure Data Lake container name
        folder_name = "raw-data"  # Folder in Data Lake
        
        # Check if the file already exists in the container
        file_exists = check_if_file_exists(container_name, folder_name, file_name)

        if file_exists:
            # If file exists, append the new data to it
            print(f"File {file_name} exists, appending new data.")
            # Read existing data from the file in Azure Data Lake (via pandas)
            blob_client = BlobServiceClient.from_connection_string("CONNECTION STRING TO ADL").get_container_client(container_name).get_blob_client(f"{folder_name}/{file_name}")
            existing_data = pd.read_csv(io.StringIO(blob_client.download_blob().readall().decode()))
            
            # Convert new data to DataFrame and append
            new_data = pd.DataFrame(data)
            updated_data = pd.concat([existing_data, new_data], ignore_index=True)
            updated_data.to_csv(file_name, index=False)
        else:
            # If file doesn't exist, create a new one
            print(f"File {file_name} doesn't exist, creating new file.")
            # Convert the new data to a DataFrame and upload as a new file
            new_data = pd.DataFrame(data)
            new_data.to_csv(file_name, index=False)

        # Upload the new or updated file
        upload_to_datalake(updated_data if file_exists else new_data, container_name, folder_name, file_name)
    else:
        print("No data retrieved from the API.")

# Run the main function
main()
