In [23]:
import os
import boto3
import sagemaker
from sagemaker.feature_store.feature_group import FeatureGroup
from botocore.exceptions import ClientError
from pyathena import connect
import time

# ✅ Retrieve stored variables from previous notebooks
stored_variables = ["dev_feature_store_table", "prod_feature_store_table", 
                    "dev_feature_group_name", "prod_feature_group_name",
                    "baseline_model_path", "baseline_model_logistic_path",
                    "endpoint_name_single_request"]

for var in stored_variables:
    try:
        %store -r {var}
    except KeyError:
        print(f"⚠️ Warning: `{var}` is not stored. Skipping...")

print("✅ Stored variables loaded (if available).")

# ✅ Initialize AWS Session
session = boto3.session.Session()
region = session.region_name
sagemaker_session = sagemaker.Session()

# ✅ Use SageMaker's default bucket
bucket = sagemaker_session.default_bucket()
prefix = "flight-delay-prediction-xgboost"  # ✅ Ensure this matches what was used in training

# ✅ Set up Athena connection
s3_staging_dir = f's3://{bucket}/athena-query-results/'
conn = connect(s3_staging_dir=s3_staging_dir, region_name=region)

# ✅ Define variables (check if they exist before using)
ATHENA_DATABASE = "sagemaker_featurestore"
ATHENA_TABLES_TO_DROP = [var for var in [dev_feature_store_table, prod_feature_store_table] if 'var' in locals()]
GLUE_DATABASE_TO_DROP = "db_airline_delay_cause"

# ✅ Initialize AWS clients
s3_client = boto3.client("s3")
athena_client = boto3.client("athena")
sagemaker_client = boto3.client("sagemaker", region_name=region)
glue_client = boto3.client("glue", region_name=region)

# ✅ Feature Group Names (if they exist)
DEV_FEATURE_GROUP_NAME = dev_feature_group_name if "dev_feature_group_name" in locals() else None
PROD_FEATURE_GROUP_NAME = prod_feature_group_name if "prod_feature_group_name" in locals() else None

# ✅ Delete Feature Groups
def delete_feature_groups():
    for feature_group_name in [DEV_FEATURE_GROUP_NAME, PROD_FEATURE_GROUP_NAME]:
        if feature_group_name is None:
            print("⚠️ Warning: Skipping Feature Group deletion (not defined).")
            continue

        try:
            print(f"🔍 Checking if Feature Group `{feature_group_name}` exists...")
            existing_groups = sagemaker_client.list_feature_groups()['FeatureGroupSummaries']
            existing_group_names = [fg['FeatureGroupName'] for fg in existing_groups]

            if feature_group_name in existing_group_names:
                print(f"🚀 Feature Group `{feature_group_name}` found. Deleting...")
                sagemaker_client.delete_feature_group(FeatureGroupName=feature_group_name)

                # Wait until deletion is complete
                while True:
                    existing_groups = sagemaker_client.list_feature_groups()['FeatureGroupSummaries']
                    existing_group_names = [fg['FeatureGroupName'] for fg in existing_groups]
                    if feature_group_name not in existing_group_names:
                        print(f"✅ Feature Group `{feature_group_name}` deleted successfully.")
                        break
                    print("⏳ Waiting for Feature Group deletion...")
                    time.sleep(5)
            else:
                print(f"✅ Feature Group `{feature_group_name}` does not exist. No deletion needed.")
        except Exception as e:
            print(f"❌ Error deleting Feature Group `{feature_group_name}`: {e}")

# ✅ Delete Model Files
def delete_model_files():
    for model_path in [baseline_model_path, baseline_model_logistic_path]:
        if 'model_path' not in locals():
            print(f"⚠️ Warning: Model path `{model_path}` is not defined. Skipping...")
            continue
        
        if os.path.exists(model_path):
            try:
                os.remove(model_path)
                print(f"🗑️ Deleted model file: {model_path}")
            except Exception as e:
                print(f"❌ Error deleting model file `{model_path}`: {e}")
        else:
            print(f"✅ Model file `{model_path}` does not exist. No deletion needed.")

# ✅ Delete SageMaker Endpoints
def delete_sagemaker_endpoints():
    if "endpoint_name_single_request" not in locals():
        print("⚠️ Warning: No stored SageMaker endpoint name found. Skipping...")
        return
    
    try:
        print(f"🔍 Checking if SageMaker endpoint `{endpoint_name_single_request}` exists...")
        response = sagemaker_client.describe_endpoint(EndpointName=endpoint_name_single_request)
        
        if response["EndpointStatus"] in ["Creating", "InService", "RollingBack", "Updating"]:
            print(f"🚀 Deleting SageMaker endpoint `{endpoint_name_single_request}`...")
            sagemaker_client.delete_endpoint(EndpointName=endpoint_name_single_request)

            # Wait until the endpoint is deleted
            while True:
                try:
                    response = sagemaker_client.describe_endpoint(EndpointName=endpoint_name_single_request)
                    print("⏳ Waiting for endpoint deletion...")
                    time.sleep(5)
                except ClientError as e:
                    if "Could not find endpoint" in str(e):
                        print(f"✅ SageMaker endpoint `{endpoint_name_single_request}` deleted successfully.")
                        break
                    else:
                        print(f"⚠️ Unexpected error: {e}")
                        break
        else:
            print(f"✅ SageMaker endpoint `{endpoint_name_single_request}` does not exist or is already deleted.")
    except ClientError as e:
        print(f"⚠️ SageMaker endpoint `{endpoint_name_single_request}` not found. Skipping...")

# ✅ Delete Batch Transform Files in S3
def delete_s3_batch_files():
    batch_s3_path = f"{prefix}/batch-output/"
    
    try:
        print(f"🔍 Checking S3 for batch transform files in `{batch_s3_path}`...")
        response = s3_client.list_objects_v2(Bucket=bucket, Prefix=batch_s3_path)
        
        if "Contents" in response:
            print(f"🚀 Deleting all batch transform files in `{batch_s3_path}`...")
            objects_to_delete = [{"Key": obj["Key"]} for obj in response["Contents"]]
            s3_client.delete_objects(Bucket=bucket, Delete={"Objects": objects_to_delete})
            print(f"✅ Batch transform files deleted successfully.")
        else:
            print(f"✅ No batch transform files found. Skipping deletion.")
    except Exception as e:
        print(f"❌ Error deleting batch files: {e}")

# ✅ Remove Stored Variables
def clear_stored_variables():
    for var in ["baseline_model_path", "baseline_model_logistic_path", "endpoint_name_single_request"]:
        try:
            %store -d {var}
            print(f"🧹 Removed `{var}` from %store.")
        except Exception:
            print(f"⚠️ `{var}` was not in %store. Skipping...")

# ✅ Run cleanup
def clean_state():
    print("\n🚀 **Starting Full Cleanup...**\n")
    delete_feature_groups()
    delete_model_files()
    delete_sagemaker_endpoints()
    delete_s3_batch_files()  # ✅ Added batch output deletion
    clear_stored_variables()
    print("\n✅ **Cleanup completed successfully!**")

# ✅ Execute the cleanup function
clean_state()


✅ Stored variables loaded (if available).

🚀 **Starting Full Cleanup...**

🔍 Checking if Feature Group `airline_delay_features_dev` exists...
🚀 Feature Group `airline_delay_features_dev` found. Deleting...
⏳ Waiting for Feature Group deletion...
✅ Feature Group `airline_delay_features_dev` deleted successfully.
🔍 Checking if Feature Group `airline_delay_features_prod` exists...
🚀 Feature Group `airline_delay_features_prod` found. Deleting...
⏳ Waiting for Feature Group deletion...
✅ Feature Group `airline_delay_features_prod` deleted successfully.
🗑️ Deleted model file: baseline_model.pkl
🗑️ Deleted model file: baseline_model_logistic.pkl
🧹 Removed `baseline_model_path` from %store.
🧹 Removed `baseline_model_logistic_path` from %store.

✅ **Cleanup completed successfully!**


# Use code below to check what else is on your system and whether something was left behind

In [22]:
import boto3
from botocore.exceptions import ClientError
from pyathena import connect

# ✅ Initialize AWS Session
session = boto3.session.Session()
region = session.region_name
sagemaker_session = boto3.Session()

# ✅ Use SageMaker's default bucket
bucket = sagemaker_session.client("s3").list_buckets()["Buckets"][0]["Name"]

# ✅ Set up Athena connection
s3_staging_dir = f's3://{bucket}/athena-query-results/'
conn = connect(s3_staging_dir=s3_staging_dir, region_name=region)

# ✅ Initialize AWS clients
s3_client = boto3.client("s3", region_name=region)
athena_client = boto3.client("athena", region_name=region)
sagemaker_client = boto3.client("sagemaker", region_name=region)
glue_client = boto3.client("glue", region_name=region)

# ✅ Function to list AWS resources
def list_aws_resources():
    print("\n--- 📌 AWS Resource Overview ---")
    
    # ✅ List Athena Databases
    try:
        databases = athena_client.list_databases(CatalogName="AwsDataCatalog")["DatabaseList"]
        database_names = [db["Name"] for db in databases]
        print("\n📌 **Athena Databases:**")
        for db in database_names:
            print(f"   - {db}")
    except ClientError as e:
        print("❌ Error listing Athena databases:", e)
        database_names = []  # Ensure it doesn't break the next step

    # ✅ List Athena Tables Per Database
    for database in database_names:
        try:
            tables = athena_client.list_table_metadata(CatalogName="AwsDataCatalog", DatabaseName=database)["TableMetadataList"]
            table_names = [table["Name"] for table in tables]

            print(f"\n📌 **Tables in Athena Database: `{database}`**")
            if table_names:
                for table in table_names:
                    print(f"   - {table}")
            else:
                print("   ❌ No tables found in this database.")

        except ClientError as e:
            print(f"❌ Error listing tables in `{database}`:", e)

    # ✅ List Feature Store Groups
    try:
        feature_groups = sagemaker_client.list_feature_groups()["FeatureGroupSummaries"]
        feature_group_names = [fg["FeatureGroupName"] for fg in feature_groups]
        print("\n📌 **Feature Store Groups:**")
        if feature_group_names:
            for fg in feature_group_names:
                print(f"   - {fg}")
        else:
            print("   ❌ No Feature Groups found.")
    except ClientError as e:
        print("❌ Error listing Feature Groups:", e)

    # ✅ List S3 Files
    try:
        objects = s3_client.list_objects_v2(Bucket=bucket)
        s3_files = [obj["Key"] for obj in objects.get("Contents", [])]
        print("\n📌 **S3 Files in Bucket `{bucket}`:**")
        if s3_files:
            for file in s3_files[:10]:  # Show only first 10 files for readability
                print(f"   - {file}")
            if len(s3_files) > 10:
                print(f"   ... ({len(s3_files)} total files)")
        else:
            print("   ❌ No files found.")
    except ClientError as e:
        print("❌ Error listing S3 files:", e)

    # ✅ List Glue Databases
    try:
        glue_databases = glue_client.get_databases()["DatabaseList"]
        glue_db_names = [db["Name"] for db in glue_databases]
        print("\n📌 **Glue Databases:**")
        if glue_db_names:
            for db in glue_db_names:
                print(f"   - {db}")
        else:
            print("   ❌ No Glue Databases found.")
    except ClientError as e:
        print("❌ Error listing Glue databases:", e)

# ✅ Run AWS resource listing
list_aws_resources()



--- 📌 AWS Resource Overview ---

📌 **Athena Databases:**
   - default

📌 **Tables in Athena Database: `default`**
   ❌ No tables found in this database.

📌 **Feature Store Groups:**
   ❌ No Feature Groups found.

📌 **S3 Files in Bucket `{bucket}`:**
   ❌ No files found.

📌 **Glue Databases:**
   - default
