### Step 2: Utility Functions and Configurations
This step defines utility functions for directory management, warning suppression, image encoding, and NLP configurations. It also includes operator configurations for anonymization.

Cells included:

- Utility functions for directory and file handling.
- NLP model definitions.
- Operator configurations for Presidio.


In [None]:
# Cell 5 - Utility functions
import os
import shutil
import cv2 as cv
import base64
from typing import List

CREATEDIRECTORY = lambda x: os.mkdir(x) if not os.path.exists(x) else None
DELETEDIRECTORY = lambda x: shutil.rmtree(x) if len(os.listdir(x)) > 1 else None
DELETEFILE = lambda x: os.remove(x) if os.path.exists(x) else None
SORTDIRECTORY = lambda x: int(x.rsplit("_",1)[-1].split(".")[0])
CREATEINPUTURL = lambda x: {"type":"image_url","image_url":{"url":f"data:image/jpeg;base64,{str(x)}"}}
CREATEINPUTTEXT = lambda x: {"type":"text","text":str(x)}

def TRANSFORMBASE64(initImage:IMAGE)->PROCESS:
    success, buffer = cv.imencode(".jpg", initImage)
    if not success:
        raise ValueError("COULD NOT ENCODE - ERROR JPEG FORMAT")
    encodedImage = base64.b64encode(buffer).decode("utf-8")
    return encodedImage

In [None]:
# Cell 6 - Warning filters
from warnings import filterwarnings
filterwarnings(action="ignore", category=DeprecationWarning)
filterwarnings(action="ignore", category=UserWarning)
filterwarnings(action="ignore", category=FutureWarning)

In [None]:
# Cell 7 - NLP configurations
def DEFINELANGUAGE(initLanguage:str)->CONFIGURATION:
    if initLanguage.lower() == "es":
        modelType = "es_core_news_md"
    elif initLanguage.lower() == "en":
        modelType = "en_core_web_lg"
    elif initLanguage.lower() == "fr":
        modelType = "fr_core_news_sm"
    elif initLanguage.lower() == "de":
        modelType = "de_core_news_sm"
    elif initLanguage.lower() == "ru":
        modelType = "ru_core_news_sm"
    elif initLanguage.lower() == "nl":
        modelType = "nl_core_news_sm"
    else:
        modelType = "xx_sent_ud_sm"

    NLPENGINECONFIGURATION = {
        "nlp_engine_name": "spacy",
        "models": [
            {
                "lang_code": initLanguage.lower(),
                "model_name": modelType
            }
        ]
    }
    return NLPENGINECONFIGURATION


In [None]:
# Cell 8 - Operator configurations
from presidio_anonymizer.entities import OperatorConfig

OPERATORGENERAL = {
    "DEFAULT": OperatorConfig("replace", {"new_value": "-DETECTED_CONCERN-"}),
    "PHONE_NUMBER": OperatorConfig("replace", {"new_value": "-MASKED_PHONENUMBER_RELATED-"}),
    "TITLE": OperatorConfig("replace", {"new_value": "-MASKED_TITLE_RELATED-"}),
    "EMAIL_ADDRESS": OperatorConfig("replace", {"new_value": "-MASKED_EMAIL_RELATED-"}),
    "CREDIT_CARD": OperatorConfig("replace", {"new_value": "-MASKED_CREDITCARD_RELATED-"}),
    "LOCATION": OperatorConfig("replace", {"new_value": "-MASKED_LOCATION_RELATED-"}),
    "URL": OperatorConfig("replace", {"new_value": "-MASKED_URL_RELATED-"}),
    "DATE_TIME": OperatorConfig("replace", {"new_value": "-MASKED_DATETIME_RELATED-"}),
    "CRYPTO": OperatorConfig("replace", {"new_value": "-MASKED_CRYPTO_RELATED-"}),
    "NRP": OperatorConfig("replace", {"new_value": "-MASKED_NRP_RELATED-"}),
    "IBAN_CODE": OperatorConfig("replace", {"new_value": "-MASKED_IBAN_RELATED-"}),
    "PERSON": OperatorConfig("replace", {"new_value": "-MASKED_PERSON_RELATED-"}),
    "MEDICAL_LICENSE": OperatorConfig("replace", {"new_value": "-MASKED_MEDICAL_RELATED-"})
}