# Data Transformation for the listings.csv files for Prague and Barcelona

In [0]:
import requests
import pandas as pd
from datetime import datetime
import xml.etree.ElementTree as ET
# For Azure connection:
from azure.storage.blob import BlobServiceClient
from io import BytesIO, StringIO

## Conversion rate from CZK to EUR

The currency exchange rates are obtained from the free API from the [National Bank of Poland](https://api.nbp.pl/en.html). A simple API call specifying the target currency returns the exchange rate relative to the Polish złoty (PLN). By dividing the exchange rate from CZK to PLN by the exchange rate from EUR to PLN, the CZK to EUR rate can be derived.

In [0]:
def fetch_exchange_rate(currency_code):

    '''Function to extract any curency exchange rate from Polish Zloty to {currency} using NBP's open API'''
    
    url = f"https://api.nbp.pl/api/exchangerates/rates/a/{currency_code}/"
    response = requests.get(url, headers={"Accept": "application/xml"})
    if response.status_code == 200:
        root = ET.fromstring(response.content)
        rate = float(root.find(".//Mid").text)
        date = root.find(".//EffectiveDate").text
        return date, rate
    else:
        raise ValueError(f"Failed to fetch exchange rate for {currency_code}")

In [0]:
try:
    # Fetch the CZK and EUR exchange rates with timestamps
    czk_date, czk_to_pln = fetch_exchange_rate("czk")
    eur_date, eur_to_pln = fetch_exchange_rate("eur")
    
    # Ensure the dates are the same for consistency
    if czk_date != eur_date:
        raise ValueError("Timestamps for CZK and EUR rates do not match.")

    # Calculate the CZK to EUR exchange rate
    czk_to_eur = czk_to_pln / eur_to_pln

    # Create a DataFrame with the timestamp, rates in PLN, and calculated CZK to EUR
    data = {
        "Date": [czk_date],
        "CZK_to_PLN": [czk_to_pln],
        "EUR_to_PLN": [eur_to_pln],
        "CZK_to_EUR": [czk_to_eur]
    }
    
    df_conversion_rates = pd.DataFrame(data)

except ValueError as e:
    print(e)

The rates as well as date of extraction are the following:

In [0]:
df_conversion_rates

## Uploading the data to the Azure Blob Storage

In [0]:
# Azure Blob Storage
connection_string = "DefaultEndpointsProtocol=https;AccountName=datalakestoragerentscape;AccountKey=w6Edf3np1A18vQIei31unvKWjGpyDUBqexvVauAwCeqOmnF1Bq7WsIEVplSEW+hT0q4ZzDi2KNh4+AStrOcI6g==;EndpointSuffix=core.windows.net"
container_name = "bnpapi-rentscape-blob"

# Blob name (new file name)
blob_name = "conversion_rates.json"

In [0]:
# Convert the DataFrame to JSON format in memory
json_buffer = StringIO()
df_conversion_rates.to_json(json_buffer, orient="records", lines=False)
json_buffer.seek(0)  # Reset the buffer position to the beginning

try:
    # Create a BlobServiceClient and ContainerClient
    blob_service_client = BlobServiceClient.from_connection_string(connection_string)
    container_client = blob_service_client.get_container_client(container_name)
    print("Connected to Azure Blob Storage.")

    # Check if the blob already exists
    blobs_list = [blob.name for blob in container_client.list_blobs()]
    if blob_name in blobs_list:
        print(f"A blob with the name '{blob_name}' already exists in the container '{container_name}'.")
    else:
        # Upload the file if it doesn't exist
        blob_client = container_client.get_blob_client(blob=blob_name)
        blob_client.upload_blob(json_buffer.getvalue(), overwrite=False)
        print(f"File '{blob_name}' uploaded successfully!")
except Exception as e:
    print("An error occurred:", e)

In [0]:
blob_list = container_client.list_blobs()
print(container_name)
for blob in blob_list:
    print(f"+---{blob.name}")