In [2]:
import os
import boto3
import sagemaker
from sagemaker.feature_store.feature_group import FeatureGroup
from botocore.exceptions import ClientError
from pyathena import connect
import time

# ✅ Initialize AWS Session
session = boto3.session.Session()
region = session.region_name
sagemaker_session = sagemaker.Session()

# ✅ Use SageMaker's default bucket
bucket = sagemaker_session.default_bucket()

# ✅ Set up Athena connection
s3_staging_dir = f's3://{bucket}/athena-query-results/'
conn = connect(s3_staging_dir=s3_staging_dir, region_name=region)

# ✅ Define variables
ATHENA_DATABASE = "sagemaker_featurestore"
ATHENA_TABLES_TO_DROP = ["development_data", "production_data"]
FEATURE_GROUP_NAME = "airline_delay_features"
GLUE_DATABASE_TO_DROP = "db_airline_delay_cause"

# ✅ Initialize AWS clients
s3_client = boto3.client("s3")
athena_client = boto3.client("athena")
sagemaker_client = boto3.client("sagemaker", region_name=region)
glue_client = boto3.client("glue", region_name=region)

# ✅ Remove local files
def remove_local_files():
    files_to_remove = ["development_data.csv", "production_data.csv"]
    for file in files_to_remove:
        if os.path.exists(file):
            os.remove(file)
            print(f"🗑️ Removed {file} from local storage.")
        else:
            print(f"ℹ️ {file} not found in local storage.")

# ✅ Remove files from S3
def remove_s3_files():
    for file in ["development_data.csv", "production_data.csv"]:
        try:
            s3_client.delete_object(Bucket=bucket, Key=file)
            print(f"🗑️ Deleted {file} from S3 bucket {bucket}.")
        except ClientError as e:
            print(f"❌ Error deleting {file} from S3: {e}")

# ✅ Drop specific Athena tables
def drop_athena_tables():
    for table in ATHENA_TABLES_TO_DROP:
        query = f"DROP TABLE IF EXISTS {ATHENA_DATABASE}.{table}"
        try:
            response = athena_client.start_query_execution(
                QueryString=query,
                QueryExecutionContext={"Database": ATHENA_DATABASE},
                ResultConfiguration={"OutputLocation": f"s3://{bucket}/athena-logs/"},
            )
            print(f"🗑️ Athena table `{table}` drop request sent.")
        except ClientError as e:
            print(f"❌ Error dropping Athena table `{table}`: {e}")

# ✅ Delete old `airline_delay_features_*` tables from Athena
def drop_old_airline_feature_tables():
    try:
        tables = athena_client.list_table_metadata(CatalogName="AwsDataCatalog", DatabaseName=ATHENA_DATABASE)["TableMetadataList"]
        table_names = [table["Name"] for table in tables]

        tables_to_delete = [table for table in table_names if table.startswith("airline_delay_features")]

        if tables_to_delete:
            print(f"\n🚀 **Deleting {len(tables_to_delete)} outdated `airline_delay_features_*` tables...**")

            for table in tables_to_delete:
                try:
                    glue_client.delete_table(DatabaseName=ATHENA_DATABASE, Name=table)
                    print(f"✅ Successfully deleted `{table}` from Athena.")
                except ClientError as e:
                    print(f"❌ Error deleting `{table}`:", e)

        else:
            print("\n✅ No outdated `airline_delay_features_*` tables found.")

    except ClientError as e:
        print("❌ Error listing Athena tables:", e)

# ✅ Delete Feature Store Feature Group
def delete_feature_group():
    try:
        print(f"🔍 Checking if Feature Group `{FEATURE_GROUP_NAME}` exists...")
        existing_groups = sagemaker_client.list_feature_groups()['FeatureGroupSummaries']
        existing_group_names = [fg['FeatureGroupName'] for fg in existing_groups]

        if FEATURE_GROUP_NAME in existing_group_names:
            print(f"🚀 Feature Group `{FEATURE_GROUP_NAME}` found. Deleting...")
            sagemaker_client.delete_feature_group(FeatureGroupName=FEATURE_GROUP_NAME)
            
            while True:
                existing_groups = sagemaker_client.list_feature_groups()['FeatureGroupSummaries']
                existing_group_names = [fg['FeatureGroupName'] for fg in existing_groups]
                if FEATURE_GROUP_NAME not in existing_group_names:
                    print(f"✅ Feature Group `{FEATURE_GROUP_NAME}` deleted successfully.")
                    break
                print("⏳ Waiting for Feature Group deletion...")
                time.sleep(5)
        else:
            print(f"✅ Feature Group `{FEATURE_GROUP_NAME}` does not exist. No deletion needed.")
    except Exception as e:
        print(f"❌ Error deleting Feature Group: {e}")

# ✅ Delete AWS Glue database
def delete_glue_database():
    try:
        glue_client.delete_database(Name=GLUE_DATABASE_TO_DROP)
        print(f"✅ Glue Database `{GLUE_DATABASE_TO_DROP}` deleted successfully.")
    except ClientError as e:
        print(f"❌ Error deleting Glue Database `{GLUE_DATABASE_TO_DROP}`: {e}")

# ✅ Clear Jupyter Notebook %store
def clear_ipython_store():
    try:
        from IPython.core.interactiveshell import InteractiveShell
        ip = InteractiveShell.instance()
        ip.db.clear()
        print("✅ Jupyter Notebook %store cleared successfully.")
    except Exception as e:
        print(f"❌ Error clearing Jupyter Notebook %store: {e}")

# ✅ Clean all monitors (SageMaker Model Monitor schedules)
def delete_all_monitors():
    try:
        response = sagemaker_client.list_monitoring_schedules()
        schedules = response.get("MonitoringScheduleSummaries", [])
        if schedules:
            print(f"\n🚀 **Deleting {len(schedules)} monitoring schedules...**")
            for schedule in schedules:
                name = schedule["MonitoringScheduleName"]
                try:
                    sagemaker_client.delete_monitoring_schedule(MonitoringScheduleName=name)
                    print(f"✅ Deleted monitoring schedule: {name}")
                except Exception as e:
                    print(f"❌ Error deleting monitoring schedule {name}: {e}")
        else:
            print("✅ No monitoring schedules found.")
    except Exception as e:
        print(f"❌ Error listing monitoring schedules: {e}")

# ✅ Run all cleanup operations
def clean_state():
    print("\n🚀 **Starting Full Cleanup...**\n")
    remove_local_files()
    remove_s3_files()
    drop_athena_tables()
    drop_old_airline_feature_tables()
    delete_feature_group()
    delete_glue_database()
    clear_ipython_store()
    delete_all_monitors()  # <-- New function to clean all monitors
    print("\n✅ **Cleanup completed successfully!**")

# ✅ Execute the cleanup function
clean_state()



🚀 **Starting Full Cleanup...**

ℹ️ development_data.csv not found in local storage.
ℹ️ production_data.csv not found in local storage.
🗑️ Deleted development_data.csv from S3 bucket sagemaker-us-east-1-607916531205.
🗑️ Deleted production_data.csv from S3 bucket sagemaker-us-east-1-607916531205.
🗑️ Athena table `development_data` drop request sent.
🗑️ Athena table `production_data` drop request sent.

✅ No outdated `airline_delay_features_*` tables found.
🔍 Checking if Feature Group `airline_delay_features` exists...
✅ Feature Group `airline_delay_features` does not exist. No deletion needed.
❌ Error deleting Glue Database `db_airline_delay_cause`: An error occurred (EntityNotFoundException) when calling the DeleteDatabase operation: Database db_airline_delay_cause not found.
✅ Jupyter Notebook %store cleared successfully.
✅ No monitoring schedules found.

✅ **Cleanup completed successfully!**


# Use code below to check what else is on your system and whether something was left behind

In [9]:
import boto3
from botocore.exceptions import ClientError
from pyathena import connect

# ✅ Initialize AWS Session
session = boto3.session.Session()
region = session.region_name
sagemaker_session = boto3.Session()

# ✅ Use SageMaker's default bucket
bucket = sagemaker_session.client("s3").list_buckets()["Buckets"][0]["Name"]

# ✅ Set up Athena connection
s3_staging_dir = f's3://{bucket}/athena-query-results/'
conn = connect(s3_staging_dir=s3_staging_dir, region_name=region)

# ✅ Initialize AWS clients
s3_client = boto3.client("s3", region_name=region)
athena_client = boto3.client("athena", region_name=region)
sagemaker_client = boto3.client("sagemaker", region_name=region)
glue_client = boto3.client("glue", region_name=region)

# ✅ Function to list AWS resources
def list_aws_resources():
    print("\n--- 📌 AWS Resource Overview ---")
    
    # ✅ List Athena Databases
    try:
        databases = athena_client.list_databases(CatalogName="AwsDataCatalog")["DatabaseList"]
        database_names = [db["Name"] for db in databases]
        print("\n📌 **Athena Databases:**")
        for db in database_names:
            print(f"   - {db}")
    except ClientError as e:
        print("❌ Error listing Athena databases:", e)
        database_names = []  # Ensure it doesn't break the next step

    # ✅ List Athena Tables Per Database
    for database in database_names:
        try:
            tables = athena_client.list_table_metadata(CatalogName="AwsDataCatalog", DatabaseName=database)["TableMetadataList"]
            table_names = [table["Name"] for table in tables]

            print(f"\n📌 **Tables in Athena Database: `{database}`**")
            if table_names:
                for table in table_names:
                    print(f"   - {table}")
            else:
                print("   ❌ No tables found in this database.")

        except ClientError as e:
            print(f"❌ Error listing tables in `{database}`:", e)

    # ✅ List Feature Store Groups
    try:
        feature_groups = sagemaker_client.list_feature_groups()["FeatureGroupSummaries"]
        feature_group_names = [fg["FeatureGroupName"] for fg in feature_groups]
        print("\n📌 **Feature Store Groups:**")
        if feature_group_names:
            for fg in feature_group_names:
                print(f"   - {fg}")
        else:
            print("   ❌ No Feature Groups found.")
    except ClientError as e:
        print("❌ Error listing Feature Groups:", e)

    # ✅ List S3 Files
    try:
        objects = s3_client.list_objects_v2(Bucket=bucket)
        s3_files = [obj["Key"] for obj in objects.get("Contents", [])]
        print("\n📌 **S3 Files in Bucket `{bucket}`:**")
        if s3_files:
            for file in s3_files[:10]:  # Show only first 10 files for readability
                print(f"   - {file}")
            if len(s3_files) > 10:
                print(f"   ... ({len(s3_files)} total files)")
        else:
            print("   ❌ No files found.")
    except ClientError as e:
        print("❌ Error listing S3 files:", e)

    # ✅ List Glue Databases
    try:
        glue_databases = glue_client.get_databases()["DatabaseList"]
        glue_db_names = [db["Name"] for db in glue_databases]
        print("\n📌 **Glue Databases:**")
        if glue_db_names:
            for db in glue_db_names:
                print(f"   - {db}")
        else:
            print("   ❌ No Glue Databases found.")
    except ClientError as e:
        print("❌ Error listing Glue databases:", e)

# ✅ Run AWS resource listing
list_aws_resources()



--- 📌 AWS Resource Overview ---

📌 **Athena Databases:**

📌 **Feature Store Groups:**
   ❌ No Feature Groups found.

📌 **S3 Files in Bucket `{bucket}`:**
   ❌ No files found.

📌 **Glue Databases:**
   ❌ No Glue Databases found.
