In [7]:
import os
import boto3
import sagemaker
from sagemaker.feature_store.feature_group import FeatureGroup
from botocore.exceptions import ClientError
from pyathena import connect
import time

# ‚úÖ Initialize AWS Session
session = boto3.session.Session()
region = session.region_name
sagemaker_session = sagemaker.Session()

# ‚úÖ Use SageMaker's default bucket
bucket = sagemaker_session.default_bucket()

# ‚úÖ Set up Athena connection
s3_staging_dir = f's3://{bucket}/athena-query-results/'
conn = connect(s3_staging_dir=s3_staging_dir, region_name=region)

# ‚úÖ Define variables
ATHENA_DATABASE = "sagemaker_featurestore"
ATHENA_TABLES_TO_DROP = ["development_data", "production_data"]
FEATURE_GROUP_NAME = "airline_delay_features"
GLUE_DATABASE_TO_DROP = "db_airline_delay_cause"

# ‚úÖ Initialize AWS clients
s3_client = boto3.client("s3")
athena_client = boto3.client("athena")
sagemaker_client = boto3.client("sagemaker", region_name=region)
glue_client = boto3.client("glue", region_name=region)

# ‚úÖ Remove local files
def remove_local_files():
    files_to_remove = ["development_data.csv", "production_data.csv"]
    for file in files_to_remove:
        if os.path.exists(file):
            os.remove(file)
            print(f"üóëÔ∏è Removed {file} from local storage.")
        else:
            print(f"‚ÑπÔ∏è {file} not found in local storage.")

# ‚úÖ Remove files from S3
def remove_s3_files():
    for file in ["development_data.csv", "production_data.csv"]:
        try:
            s3_client.delete_object(Bucket=bucket, Key=file)
            print(f"üóëÔ∏è Deleted {file} from S3 bucket {bucket}.")
        except ClientError as e:
            print(f"‚ùå Error deleting {file} from S3: {e}")

# ‚úÖ Drop specific Athena tables
def drop_athena_tables():
    for table in ATHENA_TABLES_TO_DROP:
        query = f"DROP TABLE IF EXISTS {ATHENA_DATABASE}.{table}"
        try:
            response = athena_client.start_query_execution(
                QueryString=query,
                QueryExecutionContext={"Database": ATHENA_DATABASE},
                ResultConfiguration={"OutputLocation": f"s3://{bucket}/athena-logs/"},
            )
            print(f"üóëÔ∏è Athena table `{table}` drop request sent.")
        except ClientError as e:
            print(f"‚ùå Error dropping Athena table `{table}`: {e}")

# ‚úÖ Delete old `airline_delay_features_*` tables from Athena
def drop_old_airline_feature_tables():
    try:
        tables = athena_client.list_table_metadata(CatalogName="AwsDataCatalog", DatabaseName=ATHENA_DATABASE)["TableMetadataList"]
        table_names = [table["Name"] for table in tables]

        tables_to_delete = [table for table in table_names if table.startswith("airline_delay_features")]

        if tables_to_delete:
            print(f"\nüöÄ **Deleting {len(tables_to_delete)} outdated `airline_delay_features_*` tables...**")

            for table in tables_to_delete:
                try:
                    glue_client.delete_table(DatabaseName=ATHENA_DATABASE, Name=table)
                    print(f"‚úÖ Successfully deleted `{table}` from Athena.")
                except ClientError as e:
                    print(f"‚ùå Error deleting `{table}`:", e)

        else:
            print("\n‚úÖ No outdated `airline_delay_features_*` tables found.")

    except ClientError as e:
        print("‚ùå Error listing Athena tables:", e)

# ‚úÖ Delete Feature Store Feature Group
def delete_feature_group():
    try:
        print(f"üîç Checking if Feature Group `{FEATURE_GROUP_NAME}` exists...")
        existing_groups = sagemaker_client.list_feature_groups()['FeatureGroupSummaries']
        existing_group_names = [fg['FeatureGroupName'] for fg in existing_groups]

        if FEATURE_GROUP_NAME in existing_group_names:
            print(f"üöÄ Feature Group `{FEATURE_GROUP_NAME}` found. Deleting...")
            sagemaker_client.delete_feature_group(FeatureGroupName=FEATURE_GROUP_NAME)
            
            while True:
                existing_groups = sagemaker_client.list_feature_groups()['FeatureGroupSummaries']
                existing_group_names = [fg['FeatureGroupName'] for fg in existing_groups]
                if FEATURE_GROUP_NAME not in existing_group_names:
                    print(f"‚úÖ Feature Group `{FEATURE_GROUP_NAME}` deleted successfully.")
                    break
                print("‚è≥ Waiting for Feature Group deletion...")
                time.sleep(5)
        else:
            print(f"‚úÖ Feature Group `{FEATURE_GROUP_NAME}` does not exist. No deletion needed.")
    except Exception as e:
        print(f"‚ùå Error deleting Feature Group: {e}")

# ‚úÖ Delete AWS Glue database
def delete_glue_database():
    try:
        glue_client.delete_database(Name=GLUE_DATABASE_TO_DROP)
        print(f"‚úÖ Glue Database `{GLUE_DATABASE_TO_DROP}` deleted successfully.")
    except ClientError as e:
        print(f"‚ùå Error deleting Glue Database `{GLUE_DATABASE_TO_DROP}`: {e}")

# ‚úÖ Clear Jupyter Notebook %store
def clear_ipython_store():
    try:
        from IPython.core.interactiveshell import InteractiveShell
        ip = InteractiveShell.instance()
        ip.db.clear()
        print("‚úÖ Jupyter Notebook %store cleared successfully.")
    except Exception as e:
        print(f"‚ùå Error clearing Jupyter Notebook %store: {e}")

# ‚úÖ Run all cleanup operations
def clean_state():
    print("\nüöÄ **Starting Full Cleanup...**\n")
    remove_local_files()
    remove_s3_files()
    drop_athena_tables()
    drop_old_airline_feature_tables()
    delete_feature_group()
    delete_glue_database()
    clear_ipython_store()
    print("\n‚úÖ **Cleanup completed successfully!**")

# ‚úÖ Execute the cleanup function
clean_state()



üöÄ **Starting Full Cleanup...**

üóëÔ∏è Removed development_data.csv from local storage.
üóëÔ∏è Removed production_data.csv from local storage.
üóëÔ∏è Deleted development_data.csv from S3 bucket sagemaker-us-east-1-607916531205.
üóëÔ∏è Deleted production_data.csv from S3 bucket sagemaker-us-east-1-607916531205.
üóëÔ∏è Athena table `development_data` drop request sent.
üóëÔ∏è Athena table `production_data` drop request sent.

üöÄ **Deleting 7 outdated `airline_delay_features_*` tables...**
‚úÖ Successfully deleted `airline_delay_features_1739141540` from Athena.
‚úÖ Successfully deleted `airline_delay_features_1739161187` from Athena.
‚úÖ Successfully deleted `airline_delay_features_1739161465` from Athena.
‚úÖ Successfully deleted `airline_delay_features_1739161738` from Athena.
‚úÖ Successfully deleted `airline_delay_features_1739162223` from Athena.
‚úÖ Successfully deleted `airline_delay_features_1739162671` from Athena.
‚úÖ Successfully deleted `airline_delay_features_173

# Use code below to check what else is on your system and whether something was left behind

In [6]:
import boto3
from botocore.exceptions import ClientError
from pyathena import connect

# ‚úÖ Initialize AWS Session
session = boto3.session.Session()
region = session.region_name
sagemaker_session = boto3.Session()

# ‚úÖ Use SageMaker's default bucket
bucket = sagemaker_session.client("s3").list_buckets()["Buckets"][0]["Name"]

# ‚úÖ Set up Athena connection
s3_staging_dir = f's3://{bucket}/athena-query-results/'
conn = connect(s3_staging_dir=s3_staging_dir, region_name=region)

# ‚úÖ Initialize AWS clients
s3_client = boto3.client("s3", region_name=region)
athena_client = boto3.client("athena", region_name=region)
sagemaker_client = boto3.client("sagemaker", region_name=region)
glue_client = boto3.client("glue", region_name=region)

# ‚úÖ Function to list AWS resources
def list_aws_resources():
    print("\n--- üìå AWS Resource Overview ---")
    
    # ‚úÖ List Athena Databases
    try:
        databases = athena_client.list_databases(CatalogName="AwsDataCatalog")["DatabaseList"]
        database_names = [db["Name"] for db in databases]
        print("\nüìå **Athena Databases:**")
        for db in database_names:
            print(f"   - {db}")
    except ClientError as e:
        print("‚ùå Error listing Athena databases:", e)
        database_names = []  # Ensure it doesn't break the next step

    # ‚úÖ List Athena Tables Per Database
    for database in database_names:
        try:
            tables = athena_client.list_table_metadata(CatalogName="AwsDataCatalog", DatabaseName=database)["TableMetadataList"]
            table_names = [table["Name"] for table in tables]

            print(f"\nüìå **Tables in Athena Database: `{database}`**")
            if table_names:
                for table in table_names:
                    print(f"   - {table}")
            else:
                print("   ‚ùå No tables found in this database.")

        except ClientError as e:
            print(f"‚ùå Error listing tables in `{database}`:", e)

    # ‚úÖ List Feature Store Groups
    try:
        feature_groups = sagemaker_client.list_feature_groups()["FeatureGroupSummaries"]
        feature_group_names = [fg["FeatureGroupName"] for fg in feature_groups]
        print("\nüìå **Feature Store Groups:**")
        if feature_group_names:
            for fg in feature_group_names:
                print(f"   - {fg}")
        else:
            print("   ‚ùå No Feature Groups found.")
    except ClientError as e:
        print("‚ùå Error listing Feature Groups:", e)

    # ‚úÖ List S3 Files
    try:
        objects = s3_client.list_objects_v2(Bucket=bucket)
        s3_files = [obj["Key"] for obj in objects.get("Contents", [])]
        print("\nüìå **S3 Files in Bucket `{bucket}`:**")
        if s3_files:
            for file in s3_files[:10]:  # Show only first 10 files for readability
                print(f"   - {file}")
            if len(s3_files) > 10:
                print(f"   ... ({len(s3_files)} total files)")
        else:
            print("   ‚ùå No files found.")
    except ClientError as e:
        print("‚ùå Error listing S3 files:", e)

    # ‚úÖ List Glue Databases
    try:
        glue_databases = glue_client.get_databases()["DatabaseList"]
        glue_db_names = [db["Name"] for db in glue_databases]
        print("\nüìå **Glue Databases:**")
        if glue_db_names:
            for db in glue_db_names:
                print(f"   - {db}")
        else:
            print("   ‚ùå No Glue Databases found.")
    except ClientError as e:
        print("‚ùå Error listing Glue databases:", e)

# ‚úÖ Run AWS resource listing
list_aws_resources()



--- üìå AWS Resource Overview ---

üìå **Athena Databases:**
   - db_airline_delay_cause
   - default
   - dsoaws
   - hw2_db
   - sagemaker_featurestore

üìå **Tables in Athena Database: `db_airline_delay_cause`**
   - airline_delay_cause_csv_raw
   - development_data
   - production_data

üìå **Tables in Athena Database: `default`**
   ‚ùå No tables found in this database.

üìå **Tables in Athena Database: `dsoaws`**
   - amazon_reviews_parquet
   - amazon_reviews_tsv

üìå **Tables in Athena Database: `hw2_db`**
   - hm2_csv_table

üìå **Tables in Athena Database: `sagemaker_featurestore`**
   - airline_delay_features_1739141540
   - airline_delay_features_1739161187
   - airline_delay_features_1739161465
   - airline_delay_features_1739161738
   - airline_delay_features_1739162223
   - airline_delay_features_1739162671
   - airline_delay_features_1739162802
   - hw3_neighborhood_feature_group_25_07_28_14_1737790106
   - hw3_neighborhood_feature_group_25_07_28_14_1737790196
 