In [29]:
import os
import time
import boto3
import sagemaker
from botocore.exceptions import ClientError
from pyathena import connect

# ✅ Retrieve stored variables safely, defaulting to None
stored_variables = [
    "dev_feature_store_table", "prod_feature_store_table", 
    "dev_feature_group_name", "prod_feature_group_name",
    "baseline_model_path", "baseline_model_logistic_path",
    "endpoint_name_single_request", "endpoint_name_batch_transform"
]

# ✅ Ensure all variables exist, setting them to None if not found in %store
for var in stored_variables:
    try:
        %store -r {var}
    except KeyError:
        print(f"⚠️ Warning: `{var}` is not stored. Defaulting to None.")
        globals()[var] = None  # Explicitly set missing variables to None

print("✅ Stored variables loaded (if available).")

# ✅ Initialize AWS Session
session = boto3.session.Session()
region = session.region_name
sagemaker_session = sagemaker.Session()

# ✅ Use SageMaker's default bucket
bucket = sagemaker_session.default_bucket()
prefix = "flight-delay-prediction-xgboost"

# ✅ Initialize AWS clients
s3_client = boto3.client("s3")
athena_client = boto3.client("athena", region_name=region)
sagemaker_client = boto3.client("sagemaker", region_name=region)
glue_client = boto3.client("glue", region_name=region)

# ✅ Define Athena Database
ATHENA_DATABASE = "sagemaker_featurestore"


no stored variable or alias dev_feature_store_table
no stored variable or alias prod_feature_store_table
no stored variable or alias dev_feature_group_name
no stored variable or alias prod_feature_group_name
no stored variable or alias baseline_model_path
no stored variable or alias baseline_model_logistic_path
no stored variable or alias endpoint_name_single_request
no stored variable or alias endpoint_name_batch_transform
✅ Stored variables loaded (if available).


In [47]:
# ✅ Delete Feature Groups
def delete_feature_groups():
    for feature_group_name in [dev_feature_group_name, prod_feature_group_name]:
        if not feature_group_name:
            continue  # Skip if no feature group name

        try:
            existing_groups = sagemaker_client.list_feature_groups()['FeatureGroupSummaries']
            if feature_group_name in [fg['FeatureGroupName'] for fg in existing_groups]:
                print(f"🚀 Deleting Feature Group `{feature_group_name}`...")
                sagemaker_client.delete_feature_group(FeatureGroupName=feature_group_name)
                time.sleep(5)  # Allow deletion to complete
                print(f"✅ Feature Group `{feature_group_name}` deleted.")
        except Exception as e:
            print(f"❌ Error deleting `{feature_group_name}`: {e}")

# ✅ Delete Model Files
def delete_model_files():
    for model_path in [baseline_model_path, baseline_model_logistic_path]:
        if model_path and os.path.exists(model_path):
            try:
                os.remove(model_path)
                print(f"🗑️ Deleted model file: {model_path}")
            except Exception as e:
                print(f"❌ Error deleting `{model_path}`: {e}")

# ✅ Delete SageMaker Endpoints (Single Request & Batch Transform)
def delete_sagemaker_endpoints():
    endpoint_names = [name for name in [endpoint_name_single_request, endpoint_name_batch_transform] if name is not None]

    if not endpoint_names:
        print("✅ No SageMaker endpoints to delete.")
        return

    for endpoint_name in endpoint_names:
        try:
            response = sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
            if response["EndpointStatus"] in ["Creating", "InService", "RollingBack", "Updating"]:
                print(f"🚀 Deleting `{endpoint_name}`...")
                sagemaker_client.delete_endpoint(EndpointName=endpoint_name)
                time.sleep(5)  # Allow deletion to complete
                print(f"✅ Endpoint `{endpoint_name}` deleted.")
        except ClientError:
            print(f"⚠️ Endpoint `{endpoint_name}` not found. Skipping...")

# ✅ Delete SageMaker Models
def delete_sagemaker_models():
    try:
        models = sagemaker_client.list_models()["Models"]
        model_names = [model["ModelName"] for model in models]

        if model_names:
            print(f"🚀 Deleting {len(model_names)} SageMaker models...")
            for model_name in model_names:
                sagemaker_client.delete_model(ModelName=model_name)
                print(f"✅ Deleted model: {model_name}")
        else:
            print("✅ No SageMaker models found.")
    except ClientError as e:
        print(f"❌ Error deleting models: {e}")

# ✅ Stop Ongoing SageMaker Batch Transform Jobs (Completed jobs remain in history)
def stop_sagemaker_batch_jobs():
    try:
        batch_jobs = sagemaker_client.list_transform_jobs()["TransformJobSummaries"]
        running_jobs = [job["TransformJobName"] for job in batch_jobs if job["TransformJobStatus"] in ["InProgress", "Stopping"]]
        completed_jobs = [job["TransformJobName"] for job in batch_jobs if job["TransformJobStatus"] == "Completed"]

        if running_jobs:
            print(f"🚀 Stopping {len(running_jobs)} running SageMaker batch jobs...")
            for job_name in running_jobs:
                sagemaker_client.stop_transform_job(TransformJobName=job_name)
                print(f"✅ Stopped batch job: {job_name}")
        else:
            print("✅ No running batch jobs to stop.")

        if completed_jobs:
            print(f"ℹ️ {len(completed_jobs)} completed batch jobs remain in history (cannot be deleted).")

    except ClientError as e:
        print(f"❌ Error stopping batch jobs: {e}")



# ✅ Delete All Remaining S3 Files
def delete_s3_files():
    try:
        print(f"🔍 Checking S3 for files to delete in `{bucket}`...")
        objects = s3_client.list_objects_v2(Bucket=bucket)

        if "Contents" in objects:
            print(f"🚀 Deleting all {len(objects['Contents'])} files in `{bucket}`...")
            objects_to_delete = [{"Key": obj["Key"]} for obj in objects["Contents"]]
            s3_client.delete_objects(Bucket=bucket, Delete={"Objects": objects_to_delete})
            print(f"✅ All S3 files deleted.")
        else:
            print(f"✅ No files found in S3 bucket.")
    except ClientError as e:
        print(f"❌ Error deleting S3 files: {e}")



# ✅ Delete Batch Transform Files in S3
def delete_s3_batch_files():
    batch_s3_path = f"{prefix}/batch-output/"
    try:
        objects = s3_client.list_objects_v2(Bucket=bucket, Prefix=batch_s3_path)
        if "Contents" in objects:
            print(f"🚀 Deleting batch files in `{batch_s3_path}`...")
            s3_client.delete_objects(Bucket=bucket, Delete={"Objects": [{"Key": obj["Key"]} for obj in objects["Contents"]]})
            print("✅ Batch files deleted.")
    except Exception as e:
        print(f"❌ Error deleting batch files: {e}")

# ✅ Drop Feature Store Tables in Athena
def drop_feature_store_tables():
    try:
        tables = athena_client.list_table_metadata(CatalogName="AwsDataCatalog", DatabaseName=ATHENA_DATABASE)["TableMetadataList"]
        table_names = [table["Name"] for table in tables if "airline_delay_features" in table["Name"]]

        if table_names:
            print(f"🚀 Dropping {len(table_names)} tables from Athena...")
            for table in table_names:
                athena_client.start_query_execution(
                    QueryString=f"DROP TABLE IF EXISTS {ATHENA_DATABASE}.{table};",
                    QueryExecutionContext={"Database": ATHENA_DATABASE},
                    ResultConfiguration={"OutputLocation": f"s3://{bucket}/athena-query-results/"}
                )
                print(f"✅ Dropped `{table}`.")
                time.sleep(2)
        else:
            print("✅ No old feature store tables found in Athena.")
    except ClientError as e:
        print(f"❌ Error dropping Athena tables: {e}")

# ✅ Delete Glue Databases
def delete_glue_databases():
    try:
        glue_databases = glue_client.get_databases()["DatabaseList"]
        glue_db_names = [db["Name"] for db in glue_databases if "airline_delay_features" in db["Name"]]

        if glue_db_names:
            print(f"🚀 Deleting {len(glue_db_names)} Glue databases...")
            for db in glue_db_names:
                glue_client.delete_database(Name=db)
                print(f"✅ Deleted Glue database: {db}")
        else:
            print("✅ No unnecessary Glue databases found.")
    except ClientError as e:
        print(f"❌ Error deleting Glue databases: {e}")

# ✅ Remove Stored Variables
def clear_stored_variables():
    for var in stored_variables:
        try:
            %store -d {var}
            print(f"🧹 Removed `{var}` from %store.")
        except Exception:
            print(f"⚠️ `{var}` was not in %store. Skipping...")

# ✅ Delete Feature Store S3 Files
def delete_s3_feature_store_files():
    feature_store_prefix = "feature-store/"
    try:
        objects = s3_client.list_objects_v2(Bucket=bucket, Prefix=feature_store_prefix)
        if "Contents" in objects:
            print(f"🚀 Deleting feature store files in `{feature_store_prefix}`...")
            s3_client.delete_objects(Bucket=bucket, Delete={"Objects": [{"Key": obj["Key"]} for obj in objects["Contents"]]})
            print("✅ Feature store files deleted.")
        else:
            print("✅ No feature store files found.")
    except ClientError as e:
        print(f"❌ Error deleting feature store files: {e}")

# ✅ Delete SageMaker Training & Debugger Output Files
def delete_s3_training_output():
    training_output_prefix = "flight-delay-prediction-xgboost/output/"
    
    try:
        print(f"🔍 Checking S3 for training output files in `{training_output_prefix}`...")
        response = s3_client.list_objects_v2(Bucket=bucket, Prefix=training_output_prefix)

        if "Contents" in response:
            print(f"🚀 Deleting all {len(response['Contents'])} training output files in `{training_output_prefix}`...")
            objects_to_delete = [{"Key": obj["Key"]} for obj in response["Contents"]]
            s3_client.delete_objects(Bucket=bucket, Delete={"Objects": objects_to_delete})
            print(f"✅ Training output files deleted.")
        else:
            print(f"✅ No training output files found in S3 bucket.")
    except ClientError as e:
        print(f"❌ Error deleting training output files: {e}")




In [48]:
# ✅ Run Full Cleanup
def clean_state():
    print("\n🚀 **Starting Full Cleanup...**\n")
    delete_feature_groups()
    delete_model_files()
    delete_sagemaker_endpoints()
    delete_sagemaker_models()
    stop_sagemaker_batch_jobs()
    delete_s3_files()
    delete_s3_batch_files()
    drop_feature_store_tables()
    delete_glue_databases()
    clear_stored_variables()
    delete_s3_feature_store_files()
    delete_s3_training_output()
    print("\n✅ **Cleanup completed successfully!**")

# ✅ Execute cleanup
clean_state()


🚀 **Starting Full Cleanup...**

⚠️ Endpoint `flight-delay-xgboost-endpoint-single-request` not found. Skipping...
✅ No SageMaker models found.
✅ No running batch jobs to stop.
ℹ️ 10 completed batch jobs remain in history (cannot be deleted).
🔍 Checking S3 for files to delete in `sagemaker-us-east-1-607916531205`...
🚀 Deleting all 637 files in `sagemaker-us-east-1-607916531205`...
✅ All S3 files deleted.
✅ No old feature store tables found in Athena.
✅ No unnecessary Glue databases found.
🧹 Removed `dev_feature_store_table` from %store.
🧹 Removed `prod_feature_store_table` from %store.
🧹 Removed `dev_feature_group_name` from %store.
🧹 Removed `prod_feature_group_name` from %store.
🧹 Removed `baseline_model_path` from %store.
🧹 Removed `baseline_model_logistic_path` from %store.
🧹 Removed `endpoint_name_single_request` from %store.
🧹 Removed `endpoint_name_batch_transform` from %store.
✅ No feature store files found.
🔍 Checking S3 for training output files in `flight-delay-prediction-xg

# Use code below to check what else is on your system and whether something was left behind

In [49]:
import boto3
from botocore.exceptions import ClientError
from pyathena import connect

# ✅ Initialize AWS Session
session = boto3.session.Session()
region = session.region_name
sagemaker_session = sagemaker.Session()  # ✅ FIXED: Correct SageMaker Session

# ✅ Use SageMaker's default bucket
bucket = sagemaker_session.default_bucket()

# ✅ Set up Athena connection
s3_staging_dir = f's3://{bucket}/athena-query-results/'
conn = connect(s3_staging_dir=s3_staging_dir, region_name=region)

# ✅ Initialize AWS clients
s3_client = boto3.client("s3", region_name=region)
athena_client = boto3.client("athena", region_name=region)
sagemaker_client = boto3.client("sagemaker", region_name=region)
glue_client = boto3.client("glue", region_name=region)

# ✅ Function to list AWS resources
def list_aws_resources():
    print("\n--- 📌 AWS Resource Overview ---")

    # ✅ List Athena Databases
    try:
        databases = athena_client.list_databases(CatalogName="AwsDataCatalog")["DatabaseList"]
        database_names = [db["Name"] for db in databases]
        print("\n📌 **Athena Databases:**")
        for db in database_names:
            print(f"   - {db}")
    except ClientError as e:
        print("❌ Error listing Athena databases:", e)
        database_names = []  # Ensure it doesn't break the next step

    # ✅ List Athena Tables Per Database
    for database in database_names:
        try:
            tables = athena_client.list_table_metadata(CatalogName="AwsDataCatalog", DatabaseName=database)["TableMetadataList"]
            table_names = [table["Name"] for table in tables]

            print(f"\n📌 **Tables in Athena Database: `{database}`**")
            if table_names:
                for table in table_names:
                    print(f"   - {table}")
            else:
                print("   ❌ No tables found in this database.")

        except ClientError as e:
            print(f"❌ Error listing tables in `{database}`:", e)

    # ✅ List Feature Store Groups
    try:
        feature_groups = sagemaker_client.list_feature_groups()["FeatureGroupSummaries"]
        feature_group_names = [fg["FeatureGroupName"] for fg in feature_groups]
        print("\n📌 **Feature Store Groups:**")
        if feature_group_names:
            for fg in feature_group_names:
                print(f"   - {fg}")
        else:
            print("   ❌ No Feature Groups found.")
    except ClientError as e:
        print("❌ Error listing Feature Groups:", e)

    # ✅ List Deployed SageMaker Endpoints
    try:
        endpoints = sagemaker_client.list_endpoints()["Endpoints"]
        endpoint_names = [ep["EndpointName"] for ep in endpoints]
        print("\n📌 **SageMaker Endpoints:**")
        if endpoint_names:
            for ep in endpoint_names:
                print(f"   - {ep}")
        else:
            print("   ❌ No SageMaker Endpoints found.")
    except ClientError as e:
        print("❌ Error listing SageMaker Endpoints:", e)

    # ✅ List Deployed SageMaker Models
    try:
        models = sagemaker_client.list_models()["Models"]
        model_names = [model["ModelName"] for model in models]
        print("\n📌 **SageMaker Models:**")
        if model_names:
            for model in model_names:
                print(f"   - {model}")
        else:
            print("   ❌ No SageMaker Models found.")
    except ClientError as e:
        print("❌ Error listing SageMaker Models:", e)

    # ✅ List Batch Transform Jobs
    try:
        transform_jobs = sagemaker_client.list_transform_jobs()["TransformJobSummaries"]
        batch_jobs = [job["TransformJobName"] for job in transform_jobs]
        print("\n📌 **SageMaker Batch Transform Jobs:**")
        if batch_jobs:
            for job in batch_jobs:
                print(f"   - {job}")
        else:
            print("   ❌ No Batch Transform Jobs found.")
    except ClientError as e:
        print("❌ Error listing Batch Transform Jobs:", e)

    # ✅ List S3 Files
    try:
        objects = s3_client.list_objects_v2(Bucket=bucket)
        s3_files = [obj["Key"] for obj in objects.get("Contents", [])]
        print(f"\n📌 **S3 Files in Bucket `{bucket}`:**")
        if s3_files:
            for file in s3_files[:10]:  # Show only first 10 files for readability
                print(f"   - {file}")
            if len(s3_files) > 10:
                print(f"   ... ({len(s3_files)} total files)")
        else:
            print("   ❌ No files found.")
    except ClientError as e:
        print("❌ Error listing S3 files:", e)

    # ✅ List Glue Databases
    try:
        glue_databases = glue_client.get_databases()["DatabaseList"]
        glue_db_names = [db["Name"] for db in glue_databases]
        print("\n📌 **Glue Databases:**")
        if glue_db_names:
            for db in glue_db_names:
                print(f"   - {db}")
        else:
            print("   ❌ No Glue Databases found.")
    except ClientError as e:
        print("❌ Error listing Glue databases:", e)

# ✅ Run AWS resource listing
list_aws_resources()



--- 📌 AWS Resource Overview ---

📌 **Athena Databases:**
   - db_airline_delay_cause
   - default
   - sagemaker_featurestore

📌 **Tables in Athena Database: `db_airline_delay_cause`**
   - airline_delay_cause_csv_raw
   - development_data
   - production_data

📌 **Tables in Athena Database: `default`**
   ❌ No tables found in this database.

📌 **Tables in Athena Database: `sagemaker_featurestore`**
   ❌ No tables found in this database.

📌 **Feature Store Groups:**
   ❌ No Feature Groups found.

📌 **SageMaker Endpoints:**
   ❌ No SageMaker Endpoints found.

📌 **SageMaker Models:**
   ❌ No SageMaker Models found.

📌 **SageMaker Batch Transform Jobs:**
   - sagemaker-xgboost-2025-02-19-05-26-04-620
   - pipelines-lum39tvg22zt-AbaloneTransform-SdJZQXw6zR
   - pipelines-b9q304vfzej0-AbaloneTransform-hpPnA9nR73
   - pipelines-xw5ukyv44cda-AbaloneTransform-rvefu7xYmu
   - sagemaker-xgboost-2025-01-31-08-39-00-505
   - sagemaker-xgboost-2025-01-31-08-31-25-431
   - sagemaker-xgboost-2025-01