In [2]:
import os
import boto3
import sagemaker
from sagemaker.feature_store.feature_group import FeatureGroup
from botocore.exceptions import ClientError
from pyathena import connect
import time

# Initialize AWS Session
session = boto3.session.Session()
region = session.region_name
sagemaker_session = sagemaker.Session()

# Use SageMaker's default bucket
bucket = sagemaker_session.default_bucket()

# Set up Athena connection with the default bucket
s3_staging_dir = f's3://{bucket}/athena-query-results/'
conn = connect(s3_staging_dir=s3_staging_dir, region_name=region)

# Define variables
ATHENA_DATABASE = "db_airline_delay_cause"
ATHENA_TABLES_TO_DROP = ["development_data", "production_data"]
FEATURE_GROUP_NAME = "airline_delay_features"

# Initialize AWS clients
s3_client = boto3.client("s3")
athena_client = boto3.client("athena")
sagemaker_client = boto3.client("sagemaker", region_name=region)

# Remove local files
def remove_local_files():
    files_to_remove = ["development_data.csv", "production_data.csv"]
    for file in files_to_remove:
        if os.path.exists(file):
            os.remove(file)
            print(f"Removed {file} from local storage.")
        else:
            print(f"{file} not found in local storage.")

# Remove files from S3
def remove_s3_files():
    for file in ["development_data.csv", "production_data.csv"]:
        try:
            s3_client.delete_object(Bucket=bucket, Key=file)
            print(f"Deleted {file} from S3 bucket {bucket}.")
        except ClientError as e:
            print(f"Error deleting {file} from S3: {e}")

# Drop Athena tables
def drop_athena_tables():
    for table in ATHENA_TABLES_TO_DROP:
        query = f"DROP TABLE IF EXISTS {ATHENA_DATABASE}.{table}"
        try:
            response = athena_client.start_query_execution(
                QueryString=query,
                QueryExecutionContext={"Database": ATHENA_DATABASE},
                ResultConfiguration={"OutputLocation": f"s3://{bucket}/athena-logs/"},
            )
            print(f"Athena table {table} drop request sent.")
        except ClientError as e:
            print(f"Error dropping Athena table {table}: {e}")

# Delete feature store feature group
def delete_feature_group():
    try:
        print(f"🔍 Checking if Feature Group '{FEATURE_GROUP_NAME}' exists...")
        existing_groups = sagemaker_client.list_feature_groups()['FeatureGroupSummaries']
        existing_group_names = [fg['FeatureGroupName'] for fg in existing_groups]

        if FEATURE_GROUP_NAME in existing_group_names:
            print(f"🚀 Feature Group '{FEATURE_GROUP_NAME}' found. Deleting...")
            sagemaker_client.delete_feature_group(FeatureGroupName=FEATURE_GROUP_NAME)
            
            while True:
                existing_groups = sagemaker_client.list_feature_groups()['FeatureGroupSummaries']
                existing_group_names = [fg['FeatureGroupName'] for fg in existing_groups]
                if FEATURE_GROUP_NAME not in existing_group_names:
                    print(f"✅ Feature Group '{FEATURE_GROUP_NAME}' deleted successfully.")
                    break
                print("⏳ Waiting for Feature Group deletion...")
                time.sleep(5)
        else:
            print(f"✅ Feature Group '{FEATURE_GROUP_NAME}' does not exist. No deletion needed.")
    except Exception as e:
        print(f"❌ Error deleting Feature Group: {e}")

# Run all cleanup operations
def clean_state():
    print("Starting cleanup...")
    remove_local_files()
    remove_s3_files()
    drop_athena_tables()
    delete_feature_group()
    print("Cleanup completed successfully.")

# Execute the cleanup function
clean_state()


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
Starting cleanup...
Removed development_data.csv from local storage.
Removed production_data.csv from local storage.
Deleted development_data.csv from S3 bucket sagemaker-us-east-1-607916531205.
Deleted production_data.csv from S3 bucket sagemaker-us-east-1-607916531205.
Athena table development_data drop request sent.
Athena table production_data drop request sent.
🔍 Checking if Feature Group 'airline_delay_features' exists...
🚀 Feature Group 'airline_delay_features' found. Deleting...
⏳ Waiting for Feature Group deletion...
✅ Feature Group 'airline_delay_features' deleted successfully.
Cleanup completed successfully.


In [3]:
import boto3
from botocore.exceptions import ClientError
from pyathena import connect

# Initialize AWS Session
session = boto3.session.Session()
region = session.region_name
sagemaker_session = boto3.Session()

# Use SageMaker's default bucket
bucket = sagemaker_session.client("s3").list_buckets()["Buckets"][0]["Name"]

# Set up Athena connection
s3_staging_dir = f's3://{bucket}/athena-query-results/'
conn = connect(s3_staging_dir=s3_staging_dir, region_name=region)

# Initialize AWS clients
s3_client = boto3.client("s3", region_name=region)
athena_client = boto3.client("athena", region_name=region)
sagemaker_client = boto3.client("sagemaker", region_name=region)
glue_client = boto3.client("glue", region_name=region)

# Function to list AWS resources
def list_aws_resources():
    print("\n--- AWS Resource Overview ---")
    
    # List Athena databases
    try:
        databases = athena_client.list_databases(CatalogName="AwsDataCatalog")["DatabaseList"]
        print("Athena Databases:", [db["Name"] for db in databases])
    except ClientError as e:
        print("Error listing Athena databases:", e)
    
    # List Athena tables
    try:
        tables = athena_client.list_table_metadata(CatalogName="AwsDataCatalog", DatabaseName="default")["TableMetadataList"]
        print("Athena Tables:", [table["Name"] for table in tables])
    except ClientError as e:
        print("Error listing Athena tables:", e)
    
    # List Feature Store groups
    try:
        feature_groups = sagemaker_client.list_feature_groups()["FeatureGroupSummaries"]
        print("Feature Store Groups:", [fg["FeatureGroupName"] for fg in feature_groups])
    except ClientError as e:
        print("Error listing Feature Groups:", e)
    
    # List S3 files
    try:
        objects = s3_client.list_objects_v2(Bucket=bucket)
        s3_files = [obj["Key"] for obj in objects.get("Contents", [])]
        print("S3 Files:", s3_files if s3_files else "No files found.")
    except ClientError as e:
        print("Error listing S3 files:", e)
    
    # List AWS Glue databases
    try:
        glue_databases = glue_client.get_databases()["DatabaseList"]
        print("Glue Databases:", [db["Name"] for db in glue_databases])
    except ClientError as e:
        print("Error listing Glue databases:", e)
    
# Run AWS resource listing
list_aws_resources()



--- AWS Resource Overview ---
Athena Databases: ['db_airline_delay_cause', 'default', 'dsoaws', 'hw2_db', 'sagemaker_featurestore']
Athena Tables: []
Feature Store Groups: ['hw3-neighborhood-feature-group-25-07-28-14']
S3 Files: ['tables/0558bba1-37f0-4b3f-b603-96d849275bf6-manifest.csv', 'tables/0558bba1-37f0-4b3f-b603-96d849275bf6.metadata', 'tables/0a110ce8-6d7d-417b-9dfa-86e82a1fc0bd-manifest.csv', 'tables/0a110ce8-6d7d-417b-9dfa-86e82a1fc0bd.metadata', 'tables/0f50cace-32ad-44e5-a0bb-e9379cb785ba-manifest.csv', 'tables/0f50cace-32ad-44e5-a0bb-e9379cb785ba.metadata', 'tables/11f95b20-292f-4630-b8d9-c02d4903c1dc-manifest.csv', 'tables/11f95b20-292f-4630-b8d9-c02d4903c1dc.metadata', 'tables/14194dc5-0d0d-48d5-9985-67d21baaebf6-manifest.csv', 'tables/14194dc5-0d0d-48d5-9985-67d21baaebf6.metadata', 'tables/1ec6bcda-76c4-412e-b558-59f79a8eb941-manifest.csv', 'tables/1ec6bcda-76c4-412e-b558-59f79a8eb941.metadata', 'tables/234b483a-9c36-4f0a-b203-0960496bc346-manifest.csv', 'tables/234b4