# S3 Event notifications using VAST Event Broker and Database

> [VAST DB Python SDK](https://vastdb-sdk.readthedocs.io/en/v1.1.0/index.html)<br>
> [VAST DB SDK Github](https://github.com/vast-data/vastdb_sdk/tree/main)<br>
> [Apache Arrow Documentation](https://arrow.apache.org/docs/python/api.html)<br>
> [SE LAB Cluster 204](https://10.143.11.204/)

>  Date: 2025-06-02<br>
<h2>Prerequisites</h2>
> Vast Cluster with 5.3 or greater installed.<br>
> VIP Pool configured and accessible from docker host<br>
> VMS IP accessible from docker host<br>


## Install and Import required libraries

In [None]:
!pip install vastpy
!pip install vastdb
!pip install boto3==1.35.0

In [None]:
import vastdb
from vastpy import VASTClient

import pyarrow as pa
import pyarrow.compute as pc
import pyarrow.parquet as pq
import pandas as pd
import json
from datetime import datetime, timedelta
import pytz
import random
import time
import psutil
import os
import re
import io
import sys
import uuid
import logging
import ftplib
from IPython.display import display, HTML
import ipaddress
import boto3
from botocore.config import Config

## Define Variables

### Required VMS Information

In [None]:
vastvms_endpoint = '[VMS IP]'
vms_username = "admin"
vms_password = "123456"
vip_pool_name = "[VIP POOL NAME]"

tenant_id = 1    #Default
#
# Create a Unique ID for this run of the Notebook.
#
demo_suffix = str(uuid.uuid4()).split("-")[-1]
print(f"All VMS Objects will be created with a suffix of {demo_suffix }")

### SDK Logging

In [None]:
# Create a logger
logging.basicConfig(
    level=logging.INFO,    
    format='%(asctime)s - %(levelname)s - %(funcName)s - %(message)s', 
    handlers=[
        logging.FileHandler('vastdb_sdk.log', mode = 'a'), 
        logging.StreamHandler()  # Log to console
    ]
)
logger = logging.getLogger()
log = logging.getLogger(__name__)


## Define Functions

## Setup VAST Cluster and Database

<h3>Create VMS Client Connection to Cluster</h3>

[VASTPY .0.3.13 Documentation](https://pypi.org/project/vastpy/)

In [None]:
# Connect to the VAST Cluster using VASTPY.
client = VASTClient(user=vms_username, password=vms_password, address=vastvms_endpoint, version='latest')

### Verify VIP Pool is Available

In [None]:
#
# Query for VIP Pool
#
vip_pool_id = None
try: 
    vip_pool = client.vippools.get(name=vip_pool_name)
    vip_pool_id = vip_pool[0]['id']
    vip_pool_ip = vip_pool[0]['start_ip']
    vip_pool_range = vip_pool[0]['ip_ranges']
    vastdb_endpoint = f"http://{vip_pool_ip}"
except Exception as e:
   log.critical(f"Error during VMS client Query of VIP POOLS: {e}")

if vip_pool_id and vip_pool[0]['kafka_view_id']:
   log.critical(f"The VIP Pool {vip_pool_name} is already associated with View ID {vip_pool_id}, correct before continuing.") 
elif vip_pool_id:
   log.info(f"The VIP Pool {vip_pool_name} will be used for S3, Database and Kafka access.")
    
    
    

### Create Tabular Identity Policy

In [None]:
identitypolicy_name = f"S3-Events-Ident-{demo_suffix}"

def create_identity_policy(client, name, tenant_id):   
    """ Create an Identity Policy for Tabular Access """
    policy = {
        "Version": "2012-10-17",
        "Statement": [
            {
                "Sid": "Read_write_ALL_DB",
                "Action": "s3:Tabular*",
                "Effect": "Allow",
                "Resource": "arn:aws:s3:::*"
            }
        ]
    }
    payload = {
        "name": name,
        "policy": json.dumps(policy, indent=2),  
        "tenant_id": tenant_id,
    }   
    
    try:
        identity_policy = client.s3policies.post(**payload)
        return identity_policy
    except Exception as e:
        log.critical(e)
        raise e


identity_policy   = create_identity_policy(client, identitypolicy_name, tenant_id)
identitypolicy_id = identity_policy['id']
log.info(f"Identity Policy ID: {identitypolicy_id}")

### Create Local User and Secret / Access Keys

In [None]:
local_user_name = f"S3-Events-Usr-{demo_suffix}"
local_user_id = "2147483" + str(random.randint(100, 600))

def create_local_user(client, LocalUser, local_user_id, IdentityPolicy_id, tenant_id): 
    """ Create a Local User on Cluster """
    payload = {"name": LocalUser,
              "uid": local_user_id,
              "local_provider_id": 1, 
        	  "allow_create_bucket":"true",
        	  "allow_delete_bucket":"false",
        	  "s3_policies_ids":[IdentityPolicy_id]}
    try:
        local_user = client.users.post(**payload)
        return local_user
    except Exception as e:
        log.critical(e)
        raise e

#
# Call VMS API
#
local_user = create_local_user(client, local_user_name, local_user_id, identitypolicy_id, tenant_id)
if local_user:
  local_user_id = local_user['id']
  try:
        response = client.users[local_user_id].access_keys.post(id=local_user_id, tenant_id=tenant_id)
        S3_ACCESS_KEY = response['access_key']
        S3_SECRET_KEY = response['secret_key']
        log.info(f"Local User '{local_user_name}' now has S3 Keys.")
  except Exception as e:
        log.critical(e)
        raise e

### Create Native S3 View Policy for: Database, S3 Bucket, and Event Broker

In [None]:
view_policy_name = f"s3-events-vp-{demo_suffix}"

def create_view_policy(client, LocalUser, name, vip_pool_id, tenant_id):
    """ Create a View Policy for Database access """
    vip_pool_permission = { vip_pool_id : "RW"}
    payload = {"name": name,
                "flavor" : "S3_NATIVE",
                "permission_per_vip_pool": vip_pool_permission,
                "tenant_id": tenant_id,
                "s3_visibility":[LocalUser]
                }
    try:
        view_policy = client.viewpolicies.post(**payload)
        return view_policy
    except Exception as e:
        log.critical(e)
        raise e
#
# Query for VIP Pool
#
vip_pool_id = None
try: 
    vip_pool = client.vippools.get(name=vip_pool_name)
    vip_pool_id = vip_pool[0]['id']
    vip_pool_ip = vip_pool[0]['start_ip']
    vastdb_endpoint = f"http://{vip_pool_ip}"
except Exception as e:
   log.critical(f"Error during VMS client Query of VIP POOLS: {e}")

if vip_pool_id:
  print(f"The '{vip_pool_name}' VIP Pool was found and has an ID of {vip_pool_id}.")
  print(f"The S3 Bucket, VAST Database and Kafka Event Broker will be accessed using the VIP of {vastdb_endpoint}.") 
  log.info(f"Creating View Policy: {view_policy_name}.")  
  view_policy = create_view_policy(client, local_user_name, view_policy_name, vip_pool_id, tenant_id)
  viewpolicy_id = view_policy['id'] 
    
if view_policy:
  log.info(f"View Policy ID {viewpolicy_id} created.")
else:
  log.critical(f"Failed to create View Policy.")  



### Create Kafka Event Broker (View)

In [None]:
kafka_name = f"s3-events-kafka-{demo_suffix}"
def create_kafka_view(client, LocalUser, name, tenant_id):
    view_path = f"/{name}"
    payload = {
     "path": view_path,
     "bucket": name,
     "bucket_owner": LocalUser,
     "policy_id": viewpolicy_id,
     "tenant_id": tenant_id,
     "protocols":["KAFKA","S3","DATABASE"],
     "kafka_vip_pools": [vip_pool_id],
     "s3_object_ownership_rule":"ObjectWriter",
     "create_dir":True
    }
    try:
        kafka_view = client.views.post(**payload)
        log.info(f"Kafka view {name} created.")
        return kafka_view
    except Exception as e:
        log.critical(e)
        raise e
        
kafka_view = create_kafka_view(client, local_user_name, kafka_name, tenant_id)

    
if kafka_view:
  kafka_view_id = kafka_view['id'] 
  log.info(f"Kafka View ID {kafka_view_id} created.")
else:
  log.critical(f"Failed to create Kafka View.") 

### Create Kafka Topic

In [None]:
kafka_topic = f"s3-events-topic-{demo_suffix}"
def create_kafka_topic(client, eb_name, name, tenant_id):
    payload = {"database_name": eb_name,
               "schema_name":"kafka_topics",
               "name": name,
               "topic_partitions":1,
               "retention_ms":21600000,
               "tenant_id": tenant_id}
    
    try:
        kafka_topic = client.topics.post(**payload)
        log.info(f"Kafka topic {name} created for Event Broker {eb_name}.")
        return kafka_topic
    except Exception as e:
        log.critical(e)
        raise e

kafka_topic_obj = create_kafka_topic(client, kafka_name, kafka_topic, tenant_id)

if kafka_topic_obj:
  log.critical(f"Failed to Kafka Topic.") 


### Configure Cluster Event Notificaiton Broker

In [None]:
cluster_eb_name =  f"s3-events-eb-{demo_suffix}"

def create_eb(client, eb_name, vip_pool_range):
    
    start_ip = ipaddress.IPv4Address(vip_pool_range[0][0])
    end_ip = ipaddress.IPv4Address(vip_pool_range[0][1])
    
    hosts = [str(ip) for ip in range(int(start_ip), int(end_ip) + 1)]
    
    result = [
        {"host": str(ipaddress.IPv4Address(ip)), "port": 9092}
        for ip in range(int(start_ip), int(end_ip) + 1)
    ]

    payload = {"name": eb_name,
               "addresses": result}
    
    try:
        eb = client.kafkabrokers.post(**payload)
        log.info(f"Event Broker {eb_name} created.")
        return eb
    except Exception as e:
        log.critical(e)
        raise e

eb = create_eb(client, cluster_eb_name, vip_pool_range)

    
if eb:
  eb_id = eb['id'] 
  log.info(f"Notification Broker ID {eb_id} created.")
else:
  log.critical(f"Failed to create Notification Broker.") 

### Create S3 Bucket and Configure Notifications

In [None]:
S3_bucket_name = f"s3-events-bkt-{demo_suffix}"
def create_S3_bucket(client, LocalUser, name, tenant_id):
    S3_path = f"/{name}"
    payload = {
     "path": S3_path,
     "bucket": name,
     "bucket_owner": LocalUser,
     "policy_id": viewpolicy_id,
     "tenant_id": tenant_id,
     "protocols":["S3"],
     "s3_object_ownership_rule":"ObjectWriter",
     "event_notifications":[{"name": cluster_eb_name,
                         "topic": kafka_topic,
						 "broker_id": eb_id,
						 "triggers":["S3_OBJECT_CREATED_ALL","S3_OBJECT_TAGGING_ALL","S3_OBJECT_REMOVED_ALL"],
						 "prefix_filter":"",
						 "suffix_filter":""}],   
     "create_dir":True
    }
    
    try:
        S3_view = client.views.post(**payload)
        log.info(f"S3 bucket {name} created.")
        return S3_view
    except Exception as e:
        log.critical(e)
        raise e
        
S3_view = create_S3_bucket(client, local_user_name, S3_bucket_name, tenant_id)

    
if S3_view:
  S3_view_id = S3_view['id'] 
  log.info(f"S3 View ID {S3_view_id} created.")
else:
  log.critical(f"Failed to create S3 View / bucket.") 

### Create VAST Database

In [None]:
vastdb_bucket = f"s3-events-db-{demo_suffix}"
vastdb_path = f"/{vastdb_bucket}"

def create_database_view(client, vastdb_bucket, viewpolicy_id, vastdb_path,  LocalUser, tenant_id):
    """ Create a VAST Database, assumes directory needs to be created."""
    payload = {"bucket": vastdb_bucket,
               "path": vastdb_path,
               "policy": viewpolicy_id,
               "bucket_owner":LocalUser,
               "policy_id": viewpolicy_id,
               "protocols":["DATABASE","S3"],
               "share_acl":{"acl":[],"enabled":"false"},
               "create_dir":"true",
               "tenant_id": tenant_id}
    try:
        db = client.views.post(**payload)
        return db
    except Exception as e:
        log.critical(e)
        raise e
        
db =  create_database_view(client, 
                          vastdb_bucket, 
                          viewpolicy_id,
                          vastdb_path,
                          local_user_name,
                          tenant_id
            )
if db:
   vastdb_id = db['id']
   log.info(f"The Database was created at {db['created']}")
else:
   log.critical("Failed to created Database view.") 

## Export Demo State to Disk

In [None]:
# List of variable names as strings
var_names = [
    "vastvms_endpoint", "vms_username", "vms_password",
    "vip_pool_name", "vip_pool_ip", "tenant_id", "identitypolicy_name",
    "identitypolicy_id", "local_user_name", "local_user_id",
    "S3_ACCESS_KEY", "S3_SECRET_KEY", "view_policy_name",
    "viewpolicy_id", "kafka_name", "kafka_view_id",
    "kafka_topic", "cluster_eb_name", "eb_id",
    "S3_bucket_name", "S3_view_id", "vastdb_bucket",
    "vastdb_path", "database_id", "demo_suffix"
]


# Build a dictionary of {variable_name: value}
data_to_save = {name: globals()[name] for name in var_names if name in globals()}

# Write to a JSON file
with open("demo_state.json", "w") as f:
    json.dump(data_to_save, f, indent=2)


## Login to VMS and review configuration.

* Local User with S3 keys and Identity Policy
* 3 Views Created (DB, Bucket, Kafka)
* Cluster Notification Broker configured
* Review S3 Notification configuration and review options (multiple topics for prefix/suffix)


## Setup VAST DB 

> Switch to the S3 Events VAST DB Notebook to complete the setup.

## Cleanup Cluster

### Load Demo State and define variables

In [None]:
# Read in the Demo Variables and values.
with open("demo_state.json", "r") as f:
    data = json.load(f)
# Dynamically create python variables needed for the Demo.
for key, value in data.items():
    globals()[key] = value

### Delete Tables

In [None]:
#
# Establish Session with VAST database
#
endpoint_url = f"http://{vip_pool_ip}"
session = {}
try:
    session = vastdb.connect(
              endpoint=endpoint_url,
              access=S3_ACCESS_KEY,
              secret=S3_SECRET_KEY
             )
except Exception as e:
    log.critical(e)
if session:
   log.info("VAST DB Session started")
else:
   log.critical("Unable to connect to VAST DB.")

In [None]:
with session.transaction() as tx:
    bucket = tx.bucket(vastdb_bucket)
    schema = bucket.schema(demo_suffix)
    tables = schema.tables()
    for table in tables: 
        print(f"Deleting table '{table.name}' in schema '{schema.name}' for database '{vastdb_bucket}'.")
        table.drop()
    # Query new_schema for tables
    tables = schema.tables()
    
if tables:
    print(f"The following Tables are in the Schema '{schema.name}':")
    for table in tables:
        print(table.name)
else:
    print("No Tables in Schema")  

### Delete Schema

In [None]:
with session.transaction() as tx:
    bucket = tx.bucket(vastdb_bucket)
    schema = bucket.schema(demo_suffix)
    schema.drop()
    schemas = bucket.schemas()

print(f"The schemas in the database {vastdb_bucket} are:")
for schema in schemas:
    print(schema.name)
       

### Delete Database

In [None]:
try:
    client.views[vastdb_id].delete()
except Exception as e:
        print(e)
    

### Delete S3 Bucket

In [None]:
#
# Purge Objects from Bucket
#
endpoint_url = f"http://{vip_pool_ip}"
s3_client = boto3.client(
    's3',
    endpoint_url=endpoint_url,
    aws_access_key_id=S3_ACCESS_KEY,
    aws_secret_access_key=S3_SECRET_KEY,
    config=Config(signature_version='s3v4', 
                  parameter_validation=False, 
                  s3={'payload_signing_enabled':False,'addressing_style':'path','checksum_algorithm': None}
                 ),
    verify=False  # Set to False if the endpoint doesn't use SSL (http)
)
paginator = s3_client.get_paginator('list_objects_v2')
pages = paginator.paginate(Bucket=S3_bucket_name)

objects_to_delete = []

for page in pages:
    if 'Contents' in page:
        for obj in page['Contents']:
            objects_to_delete.append({'Key': obj['Key']})

            # Delete in batches of 1000 (API limit)
            if len(objects_to_delete) == 1000:
                s3.delete_objects(Bucket=S3_bucket_name, Delete={'Objects': objects_to_delete})
                objects_to_delete = []

# Delete any remaining objects
if objects_to_delete:
    s3_client.delete_objects(Bucket=S3_bucket_name, Delete={'Objects': objects_to_delete})

print(f"All objects deleted from bucket '{S3_bucket_name}'.")

#
# Remove S3 View
#
try:
    client.views[S3_view_id].delete()
except Exception as e:
        print(e)

### Remove Event Notification Broker Configuration

In [None]:
try:
    client.kafkabrokers[eb_id].delete()
except Exception as e:
        print(e)

### Remove Kafka View

In [None]:
#
# Remove Topic
# 
payload = {"database_name":kafka_name,
          "schema_name":"kafka_topics",
          "name":kafka_topic,
          "tenant_id":tenant_id }

try:
    client.topics.delete(**payload)
except Exception as e:
    log.critical(e)
    raise e

In [None]:
#
# Remove Kafka Schema
# 
payload = {"database_name":kafka_name,
           "name":"kafka_topics",
           "tenant_id": tenant_id}
try:
    client.schemas.delete(**payload)
except Exception as e:
    log.critical(e)
    raise e

#
# Remove Kafka View
# 
try:
    client.views[kafka_view_id].delete()
except Exception as e:
        print(e)

### Delete View Policy

In [None]:
try:
    client.viewpolicies[viewpolicy_id].delete()
except Exception as e:
        print(e)

### Delete SDK Demo User

In [None]:
try:
    client.users[local_user_id].delete()
except Exception as e:
        print(e)

### Delete Identity Policy

In [None]:
try:
    client.s3policies[identitypolicy_id].delete()
except Exception as e:
        print(e)

In [None]:
"""
      END of Script
"""