### Instalar librerias y dependencias necesarias para el proyecto

In [1]:
!pip install python-dotenv --upgrade
!pip install boto3 --upgrade
!pip install pyaml --upgrade
!pip install Jinja2 --upgrade
!pip install opensearch-py

Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
  Attempting uninstall: python-dotenv
    Found existing installation: python-dotenv 1.0.0
    Uninstalling python-dotenv-1.0.0:
      Successfully uninstalled python-dotenv-1.0.0
Successfully installed python-dotenv-1.0.1
Collecting boto3
  Downloading boto3-1.34.149-py3-none-any.whl.metadata (6.6 kB)
Collecting botocore<1.35.0,>=1.34.149 (from boto3)
  Downloading botocore-1.34.149-py3-none-any.whl.metadata (5.7 kB)
Collecting s3transfer<0.11.0,>=0.10.0 (from boto3)
  Downloading s3transfer-0.10.2-py3-none-any.whl.metadata (1.7 kB)
Downloading boto3-1.34.149-py3-none-any.whl (139 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.2/139.2 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading botocore-1.34.149-py3-none-any.whl (12.4 MB)
[2K   [90

### Importar librerias y dependencias

In [12]:
import os
import sys
import logging
import time
import boto3
import botocore
import yaml
import json
from jinja2 import Template, Environment
from dotenv import load_dotenv, find_dotenv
from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth

# Configurar el logging
logging.basicConfig(level=logging.DEBUG, 
                    format='%(asctime)s - %(levelname)s - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S',
                    handlers=[
                        logging.FileHandler("drugs.log"),
                        logging.StreamHandler()
                    ]
                   )

### Cargar variables de entorno y archivos de configuración

In [13]:
# Carga variables entorno y archivos de configuración
if not(load_dotenv(find_dotenv())):
    sys.exit('No se puede continuar porque no se han definido las variables de entorno para el  proyecto!')

# Cargar archivo de configuraciones .yaml
try:
    # Definir una función para cargar variables de entorno
    def get_env_variable(name):
        return os.getenv(name)

    # Crear un entorno Jinja2 con la función de carga de variables de entorno
    env = Environment()

    # Agregar la función de entorno al entorno Jinja2
    env.globals['env'] = get_env_variable

    with open('config.yaml', 'r') as file:
        template_content = file.read()
    
     # Cargar el contexto YAML
    yaml_data = yaml.safe_load(template_content)
    
     # Renderizar la plantilla con el contexto
    template = env.from_string(template_content)
    rendered_content = template.render(**yaml_data)
    
    # Cargar el YAML renderizado
    yaml_data = yaml.safe_load(rendered_content)
    
    print("Archivo de configuracion cargado exitosamente.")
except FileNotFoundError:
    print("Error: El archivo de configuración .yaml no se encuentra disponible")
except yaml.YAMLError as exc:
    print(f"Error al analizar el archivo YAML: {exc}")
except Exception as e:
    print(f"Ocurrió un error inesperado: {e}")
    

Archivo de configuracion cargado exitosamente.


In [14]:
#Valida que se hayan definido las variables de configuración y las variables de entorno
try:
    # Cargar variables de entorno
    aws_account_id = os.environ['aws_account_id']
    aws_region = os.environ['aws_region']
    aws_user_name = os.environ['aws_user_name']
    aws_access_key_id = os.environ['aws_access_key_id']
    aws_secret_access_key = os.environ['aws_secret_access_key']
    
    # Cargar variables de configuración
    open_search = yaml_data.get('OpenSearch', {}) 
    service = open_search.get('service')
    port = open_search.get('port')
    use_ssl = open_search.get('use_ssl')
    verify_certs = open_search.get('verify_certs')
    pool_maxsize = open_search.get('pool_maxsize')
    timeout = open_search.get('timeout')
    collection_name = open_search.get('Collection',{}).get('name')
    collection_type = open_search.get('Collection',{}).get('type')
    collection_description = open_search.get('Collection',{}).get('description')
    collection_index = open_search.get('Collection',{}).get('index')
    data_access_policy = open_search.get('DataAccessPolicy',{})
    encryption_policy = open_search.get('EncryptionPolicy',{})
    network_policy = open_search.get('NetworkPolicy',{})
    
    if(aws_account_id is None):
        raise Exception('No se ha definido la variable de entorno: aws_account_id')
    elif(aws_region is None):
        raise Exception('No se ha definido la variable de entorno: aws_region')
    elif(aws_user_name is None):
        raise Exception('No se ha definido la variable de entorno: aws_user_name')
    elif(aws_access_key_id is None):
        raise Exception('No se ha definido la variable de entorno: aws_access_key_id')
    elif(aws_secret_access_key is None):
        raise Exception('No se ha definido la variable de entorno: aws_secret_access_key')
    elif(service is None):    
        raise Exception('No se ha definido la variable de configuración de openSearch-AWS : service')
    elif(port is None):
        raise Exception('No se ha definido la variable de configuración de openSearch-AWS : port')
    elif(use_ssl is None):
        raise Exception('No se ha definido la variable de configuración de openSearch-AWS : use_ssl')
    elif(verify_certs is None):
        raise Exception('No se ha definido la variable de configuración de openSearch-AWS : verify_certs')
    elif(pool_maxsize is None):
        raise Exception('No se ha definido la variable de configuración de openSearch-AWS : pool_maxsize')
    elif(collection_name is None):
        raise Exception('No se ha definido la variable de configuración de openSearch-AWS : collection_name')
    elif(collection_index is None):
        raise Exception('No se ha definido la variable de configuración de openSearch-AWS : collection_index')
    elif(data_access_policy is None):
        raise Exception('No se ha definido la política de acceso a los datos en openSearchAWS')
    elif(encryption_policy is None):
        raise Exception('No se ha definido la política de encriptación de los datos en openSearch-AWS')
    elif(network_policy is None):
        raise Exception('No se ha definido la política de acceso a la red en openSearch-AWS')
    
    # Estblece conexión con OpenSearch-AWS-Serverless
    session = boto3.Session(aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, region_name=aws_region)
    client = session.client('opensearchserverless')
    credentials = session.get_credentials()
    awsauth = AWSV4SignerAuth(credentials, aws_region, service)  
    
    print('Conexion AWS sucessfull')
    
except Exception as ex:
    print(ex)
    print(f'Error en la linea No. {ex.__traceback__.tb_lineno}')

Conexion AWS sucessfull


### Definición de Funciones

In [15]:
# Función para crear politica de encriptación de los datos
def createEncryptionPolicy(client):
    """Creates an encryption policy that matches all collections"""
    try:
        nodo_policy_json = json.dumps(encryption_policy, indent=2)
        #print(nodo_policy_json)
        
        response = client.create_security_policy(description=encryption_policy.get('description'),
                                                 name=encryption_policy.get('name'),
                                                 policy=json.dumps(encryption_policy.get('policy',{})),
                                                 type=encryption_policy.get('type')
                                                )
        print('\nEncryption policy created:')
        print(response)
    except botocore.exceptions.ClientError as error:
        if error.response['Error']['Code'] == 'ConflictException':
            print('[ConflictException] The policy name or rules conflict with an existing policy.')
        else:
            raise error

# Función para crear politica de acceso a la red
def createNetworkPolicy(client):
    """Creates a network policy that matches all collections"""
    try:
        nodo_policy_json = json.dumps(network_policy, indent=2)
        #print(nodo_policy_json)
        
        response = client.create_security_policy(description=network_policy.get('description'),
                                                 name=network_policy.get('name'),
                                                 policy=json.dumps(network_policy.get('policy',{})),
                                                 type=network_policy.get('type')
                                                )
        print('\nNetwork policy created:')
        print(response)
    except botocore.exceptions.ClientError as error:
        if error.response['Error']['Code'] == 'ConflictException':
            print('[ConflictException] A network policy with this name already exists.')
        else:
            raise error

# Función para crear política de acceso a los datos
def createAccessPolicy(client):
    """Creates a data access policy that matches all collections"""
    try:
        nodo_policy_json = json.dumps(data_access_policy, indent=2)
        #print(nodo_policy_json)
        
        response = client.create_access_policy(description=data_access_policy.get('description'),
                                               name=data_access_policy.get('name'),
                                               policy=json.dumps(data_access_policy.get('policy',{})),
                                               type=data_access_policy.get('type')
                                              )
        print('\nAccess policy created:')
        print(response)
    except botocore.exceptions.ClientError as error:
        if error.response['Error']['Code'] == 'ConflictException':
            print('[ConflictException] An access policy with this name already exists.')
        else:
            raise error
            
# Función para crear colección
def createCollection(client):
    """Creates a collection"""
    try:
        response = client.create_collection(name=collection_name,
                                            type=collection_type,
                                            description=collection_description
                                            )
        
        response = client.batch_get_collection(names=[collection_name])
        
        # Periodically check collection status
        while (response['collectionDetails'][0]['status']) == 'CREATING':
            print('Creating collection...')
            time.sleep(10)
            response = client.batch_get_collection(names=[collection_name])
        print('\nCollection successfully created:')
        print(response["collectionDetails"])
        
        # Extract the collection endpoint from the response
        host = (response['collectionDetails'][0]['collectionEndpoint'])
        aws_hostname = host.replace("https://", "")
        indexData(aws_hostname)
        
    except botocore.exceptions.ClientError as error:
        if error.response['Error']['Code'] == 'ConflictException':
            print('[ConflictException] A collection with this name already exists. Try another name.')
        else:
            raise error


def indexData(host):
    """Create an index and add some sample data"""
    # Build the OpenSearch client
    client = OpenSearch(hosts=[{'host': host, 'port': port}],
                        http_auth=awsauth,
                        use_ssl=use_ssl,
                        verify_certs=verify_certs,
                        connection_class=RequestsHttpConnection,
                        timeout=timeout
                        )
    # It can take up to a minute for data access rules to be enforced
    time.sleep(10)

    # Create index
    
    body_index = {
        'settings': {
            'number_of_shards': 5,
            'number_of_replicas': 1
        }
    }
    
    response = client.indices.create(index=collection_index, body=body_index)
    print('\nCreating index:')
    time.sleep(5)
    print(response)
    print('\nCollection-Index successfully created.')
    
    # Add a example document to the index.
    '''
    response = client.index(
        index=collection_index,
        body={
            'title': 'Seinfeld',
            'creator': 'Larry David',
            'year': 1989
        }
    )
    print('\nDocument added:')
    print(response)
    '''

### Creación de políticas y colección de datos en OpenSearch Serverless AWS

In [16]:
def main():
    createEncryptionPolicy(client)
    createNetworkPolicy(client)
    createAccessPolicy(client)
    createCollection(client)

if __name__ == "__main__":
    main()


Encryption policy created:
{'securityPolicyDetail': {'createdDate': 1722031916638, 'description': 'Encryption policy for digital-drugs collections', 'lastModifiedDate': 1722031916638, 'name': 'digital-drugs-policy', 'policy': {'Rules': [{'Resource': ['collection/digital-drugs'], 'ResourceType': 'collection'}], 'AWSOwnedKey': True}, 'policyVersion': 'MTcyMjAzMTkxNjYzOF8x', 'type': 'encryption'}, 'ResponseMetadata': {'RequestId': '03ee3259-74d2-4063-8c92-5acc9bf56b76', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '03ee3259-74d2-4063-8c92-5acc9bf56b76', 'date': 'Fri, 26 Jul 2024 22:11:56 GMT', 'content-type': 'application/x-amz-json-1.0', 'content-length': '350', 'connection': 'keep-alive'}, 'RetryAttempts': 0}}

Network policy created:
{'securityPolicyDetail': {'createdDate': 1722031916921, 'description': 'NetworkPolicy for digital-drugs collections', 'lastModifiedDate': 1722031916921, 'name': 'digital-drugs-policy', 'policy': [{'Rules': [{'Resource': ['collection/digital-