<a href="https://colab.research.google.com/github/luciajimenezc/luciajimenezc.github.io/blob/main/pruebatfmfunciona.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install jsondiff
import requests
import json
import jsondiff
from datetime import datetime, timedelta


def date_parser(date: datetime) -> str:
    """Converts a datetime object to a string in the format YYYYMMDD."""
    return date.strftime("%Y%m%d")


def get_data(date=date_parser(datetime.now())):
    """
    Retrieves data from the API for a specific date.
    Returns a dictionary of the API response or an error message.
    """
    print(f"Getting data for {date} ...")

    headers = {
        "Accept": "application/json",
        "Referer": "https://www.boe.es/datosabiertos/api/api.php",
    }
    response = requests.get(
        f"https://www.boe.es/datosabiertos/api/boe/sumario/{date}", headers=headers
    )

    if response.status_code == 200:
        try:
            data = response.json()
        except ValueError:
            data = {"Error": "Invalid JSON in response"}
    elif response.status_code == 404:
        data = {"No data": f"No data available for {date}"}
    else:
        data = {"Error": f"Error retrieving data for {date}: {response.status_code}"}

    return data


def sanitize_keys(data):
    """
    Recursively converts all keys in a dictionary to strings.
    Handles dictionaries, lists, and other types safely.
    """
    if isinstance(data, dict):
        return {str(k): sanitize_keys(v) for k, v in data.items()}
    elif isinstance(data, list):
        return [sanitize_keys(item) for item in data]
    # For other data types (str, int, None, etc.), return as-is
    return data


def normalize_diff(diff, original, updated):
    """
    Normalizes a jsondiff.diff output to a consistent structure.
    Maps diff operations like $replace, $delete, $add to a unified format.
    """
    if not isinstance(diff, dict):
        # Ensure diff is a dictionary; if not, return an empty structure.
        return {}

    normalized = {}

    for key, value in diff.items():
        # Ensure the key is a string
        str_key = str(key)

        if isinstance(value, dict) and "$replace" in value:
            # Handle $replace operation
            normalized[str_key] = {
                "action": "replace",
                "before": original.get(key),
                "after": updated.get(key),
            }
        elif str_key.startswith("$delete"):
            # Handle $delete operation
            normalized[str_key] = {
                "action": "delete",
                "before": original.get(key),
                "after": None,
            }
        elif str_key.startswith("$add"):
            # Handle $add operation
            normalized[str_key] = {
                "action": "add",
                "before": None,
                "after": updated.get(key),
            }
        elif isinstance(value, dict):
            # Recursive handling for nested diffs
            normalized[str_key] = normalize_diff(
                value, original.get(key, {}), updated.get(key, {})
            )
        else:
            # For other cases (e.g., scalar values in the diff), record them directly.
            normalized[str_key] = {
                "action": "modify",
                "before": original.get(key),
                "after": value,
            }
    return normalized


def write_data(data, fname):
    """
    Writes data to a JSON file, ensuring all keys are sanitized for JSON serialization.
    """
    print(f"Writing data to {fname} ...")
    sanitized_data = sanitize_keys(data)

    with open(fname, "w") as f:
        try:
            json.dump(sanitized_data, f, indent=4)
            print(f"Data successfully written to {fname}")
        except TypeError as e:
            print(f"Error writing JSON: {e}")
            raise


def get_year_comparison(year):
    """
    Retrieves data for each day of a year, calculates daily differences, and saves the result.
    Ensures a consistent structure for all outputs.
    """
    yearly_report = {}

    for i in range(1, 5):  # Adjust range as needed for testing or full-year data.
        current = datetime.strptime(f"{year} {i}", "%Y %j")
        date = date_parser(current)
        data = get_data(date)

        # Compare data with the previous day and store differences.
        if i > 1:
            yesterday = current - timedelta(days=1)
            yesterday_date = date_parser(yesterday)

            # Sanitize keys before comparing
            sanitized_yesterday = sanitize_keys(yearly_report[yesterday_date])
            sanitized_data = sanitize_keys(data)

            diff = jsondiff.diff(sanitized_yesterday, sanitized_data)
            normalized_diff = normalize_diff(diff, sanitized_yesterday, sanitized_data)

            if normalized_diff:
                yearly_report[date] = {"date": date, "changes": normalized_diff}
        else:
            yearly_report[date] = {"date": date, "changes": {"$replace": data}}

    # Save yearly report to file.
    write_data(yearly_report, f"yearly_report_{year}.json")


# Execute the script for the year 2021.
if __name__ == "__main__":
    try:
        get_year_comparison(2024)
    except Exception as e:
        print(f"An error occurred: {e}")



Getting data for 20240101 ...
Getting data for 20240102 ...
Getting data for 20240103 ...
Getting data for 20240104 ...
Writing data to yearly_report_2024.json ...
Data successfully written to yearly_report_2024.json


comparar datos

In [6]:
!ls
import json

# Abre y carga el archivo JSON
with open('yearly_report_2024.json', 'r') as file:
    data = json.load(file)

# Mostrar el contenido del archivo en formato legible
print(json.dumps(data, indent=4))



[1;30;43mSe han truncado las últimas 5000 líneas del flujo de salida.[0m
                                                                    "identificador": "BOE-A-2024-138",
                                                                    "control": "2024/2",
                                                                    "titulo": "Real Decreto 1167/2023, de 19 de diciembre, por el que se nombra Decano de los Juzgados de Arenys de Mar al Magistrado don Jorge Langarita Cerrada.",
                                                                    "url_pdf": {
                                                                        "szBytes": "191632",
                                                                        "szKBytes": "187",
                                                                        "pagina_inicial": "558",
                                                                        "pagina_final": "558",
                                               