# Setup notebooks

### General libraries

In [None]:
import os
import sys
import numpy as np
import pandas as pd

from os.path import join

pd.set_option("display.max_columns", None)
pd.options.display.float_format = '{:,.2f}'.format

### Logging

In [None]:
import logging

logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO)
logger = logging.getLogger(__name__)

### Configuration conecction to DB

In [None]:
import logging
import os
import pathlib
from io import BytesIO
from azure.identity import AzureCliCredential
from azure.storage.filedatalake import DataLakeServiceClient, DataLakeDirectoryClient, DataLakeFileClient


class AzureDataLakeGen2:
    """
    This class encapsulates Data Backup Export methods and variables.
    Can be imported to read and write CSV files and SAV files to Azure Data Lake.
    """

    def __init__(self, container_name: str = "advanced-analytics", storage_account_name: str = "prodazesqubi"):
        """
        Initializes an instance of AzureDataLakeGen2 with the provided container name and storage account name.

        :param container_name: The name of the container in Azure Data Lake.
        :param storage_account_name: The name of the storage account in Azure Data Lake.
        """
        self.account_url = f"https://{storage_account_name}.dfs.core.windows.net/"
        self.cli_credential = AzureCliCredential()
        self.datalake_service_client = DataLakeServiceClient(account_url=self.account_url, credential=self.cli_credential,
                                                             proxies={ "https": "http://gdp-squid-proxy.dsp1.we1.azure.aztec.cloud.allianz"})
        self.file_system_client = self.datalake_service_client.get_file_system_client(file_system=container_name)
        
        # Logging configuration
        self.logger = logging.getLogger("azure.core.pipeline.policies.http_logging_policy")
        self.logger.setLevel(logging.WARNING)

    @staticmethod
    def _get_file_name(file_path: str) -> str:
        """
        Extracts the file name from a given file path.

        :param file_path: The file path to extract the file name from.
        :return: The file name.
        """
        path = pathlib.Path(file_path)
        return str(pathlib.Path(*path.parts[1:]))

    def _create_file_client(self, file_path: str) -> DataLakeFileClient:
        """
        Creates a new DataLakeFileClient for the given file path.

        :param file_path: The file path to create the DataLakeFileClient for.
        :return: The DataLakeFileClient.
        """
        path_str_list = file_path.split("/")
        directory_client = self.file_system_client.get_directory_client(f"{path_str_list[0]}")
        return directory_client.create_file(self._get_file_name(file_path))

    def _get_file_client(self, file_path: str) -> DataLakeFileClient:
        """
        Gets the DataLakeFileClient for the given file path.

        :param file_path: The file path to get the DataLakeFileClient for.
        :return: The DataLakeFileClient.
        """
        path_str_list = file_path.split("/")
        directory_client = self.file_system_client.get_directory_client(f"{path_str_list[0]}")
        return directory_client.get_file_client(self._get_file_name(file_path))

    def write_file_to_blob(self, file_path: str, data: bytes) -> None:
        """
        Writes the provided data to the specified file in Azure Data Lake.

        :param file_path: The file path to write the data to.
        :param data: The data to write.
        """
        file_write_client = self._create_file_client(file_path.replace("\\", "/"))
        file_write_client.upload_data(data, len(data), overwrite=True)
        self.logger.info(f"Wrote data to file {file_path}")

    def write_model_to_blob(self, file_path: str, data=None):
        """
        Uploads the given binary data to the specified file path in the Data Lake file system.

        :param file_path: str - The file path to upload the data to.
        :param data: bytes - The binary data to upload.
        """
        file_path = file_path.replace("\\", "/")
        with self._create_file_client(file_path) as file_write_client:
            file_write_client.upload_data(data, overwrite=True)

    def read_raw_file(self, file_path: str):
        """
        Downloads the contents of the file at the given file path in the Data Lake file system as bytes.

        :param file_path: str - The file path to download.
        :return: BytesIO - A BytesIO object containing the downloaded file contents.
        """
        file_path = file_path.replace("\\", "/")
        with self._get_file_client(file_path) as file_client:
            download = file_client.download_file()
            download_bytes = download.readall()
        return BytesIO(download_bytes)

In [None]:
!az login

In [None]:
dl = AzureDataLakeGen2(container_name='gensop', storage_account_name='prodazesbisas')

In [None]:
byte_io = dl.read_raw_file('AGENTES/HISTORIC/AGENTES_202304.parquet')

In [None]:
import pandas as pd

df = pd.read_parquet(byte_io)

In [None]:
df