In [289]:
# Importar Librerias
import requests
import pandas as pd
from lxml import etree
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [290]:
class CustomError(Exception):
    pass

In [291]:
URL_DATA = "https://sanctionslistservice.ofac.treas.gov/changes/612"
NAMESPACE = {"ns": "https://www.treasury.gov/ofac/DeltaFile/1.0"}

In [292]:
def fetch_data(url):
    """Solicita el contenido XML desde el servidor."""
    response = requests.get(url, verify=False)
    if response.status_code == 200:
        return response.content
    else:
        raise CustomError("Error al solicitar información del servidor")
    
def parse_xml(content):
    """Parsea el contenido XML y devuelve el elemento raíz."""
    return etree.fromstring(content)

In [293]:
def extract_publication_date(root):
    """Extrae la fecha de publicación del XML."""
    date_element = root.find("ns:publicationInfo/ns:datePublished", NAMESPACE)
    return date_element.text.split("T")[0] if date_element is not None else "unknown_date"

def extract_entity_data(entity):
    """Extrae la información de una entidad del XML."""
    action = entity.get("action", "N/A")

    alias_text = []
    full_name_text = None

    for name in entity.findall(".//ns:name", NAMESPACE):
        alias_type = name.find("ns:aliasType", NAMESPACE)  # Verificar si existe aliasType
        full_name = name.find(".//ns:translation[ns:script='Latin']/ns:formattedFullName", NAMESPACE)
        full_name = full_name.text if full_name is not None else "N/A"

        # Si existe aliasType, lo consideramos un alias, si no, es el nombre principal
        if alias_type is not None:
            alias_text.append(full_name)
        else:
            if full_name is not "N/A":
                full_name_text = full_name

    print(alias_text)
    identity_doc = entity.find("ns:identityDocuments/ns:identityDocument", NAMESPACE)
    if identity_doc is not None:
        doc_type = identity_doc.find("ns:type", NAMESPACE)
        doc_type_text = doc_type.text if doc_type is not None else "N/A"

        doc_id = identity_doc.find("ns:documentNumber", NAMESPACE)
        doc_id_text = doc_id.text if doc_id is not None else "N/A"
    else:
        doc_type_text = "N/A"
        doc_id_text = "N/A"

    return {
        "Nombre Completo": full_name_text,
        "Tipo de Documento": doc_type_text,
        "ID de Documento": doc_id_text,
        "Acción": action,
        "Alias": alias_text,
    }

  if full_name is not "N/A":


In [294]:
def transform_data(content):
    """Transforma el XML en un DataFrame."""
    root = parse_xml(content)
    entities = root.findall("ns:entities/ns:entity", NAMESPACE)
    data = [extract_entity_data(entity) for entity in entities]
    return pd.DataFrame(data), extract_publication_date(root)+"Prueba"

In [295]:
def save_to_excel(df, filename):
    """Guarda el DataFrame en un archivo Excel y ajusta el ancho de las columnas."""
    df.to_excel(filename, index=False)

    wb = load_workbook(filename)
    ws = wb.active

    for col in ws.columns:
        max_length = 0
        col_letter = get_column_letter(col[0].column)

        for cell in col:
            try:
                if cell.value:
                    max_length = max(max_length, len(str(cell.value)))
            except:
                pass

        adjusted_width = max_length + 2
        ws.column_dimensions[col_letter].width = adjusted_width

    wb.save(filename)

In [296]:
def main():
    """Función principal que ejecuta el proceso completo."""
    try:
        content = fetch_data(URL_DATA)
        df, pub_date = transform_data(content)

        filename = f"{pub_date}.xlsx"
        save_to_excel(df, filename)

        print(f"Archivo guardado como: {filename}")

    except CustomError as e:
        print(f"Error: {e}")

if __name__ == "__main__":
    main()

[]
['MISHIN, Aleksandr Igorvich', 'PIPPIN, James', 'KLICHKO, Ivan P', 'TRIPLEX560', 'ALEX560560', 'JAMES1789', 'SASHA-BRN']
["SERGEEVICH, Aleksandr Bol'shakov", 'WTLFNT', 'AAELBAS']
Archivo guardado como: 2025-02-11Prueba.xlsx
