In [1]:
!python --version

Python 3.12.9


In [None]:
!pip install --disable-pip-version-check -q pip --upgrade > /dev/null
!pip install --disable-pip-version-check -q wrapt==1.17.2
!pip install --disable-pip-version-check -q sparkmagic==0.22.0
!pip install --disable-pip-version-check -q nvidia-ml-py3==7.352.0
!pip install --disable-pip-version-check -q pydynamodb

In [None]:
!pip install --disable-pip-version-check -q awscli==1.18.216 boto3==1.29.6 botocore==1.37.2

In [None]:
!pip install --disable-pip-version-check -q botocore==1.37.4

In [None]:
!pip install --disable-pip-version-check -q awswrangler

In [None]:
!pip install --upgrade boto3 botocore awscli

In [2]:
#@title Setup and Import libraries
import pandas as pd
import numpy as np
import os
import json
import re
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

# AWS
import boto3
import sagemaker
from pyathena import connect
from pyathena.pandas.cursor import PandasCursor

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [36]:
import boto3
import time
from botocore import UNSIGNED
from botocore.client import Config

REGION = "us-west-2"

# Public dataset bucket
DATA_BUCKET = "yelp-aai540-group6"
BASE_PREFIX = "yelp-dataset/json/"

ATHENA_BUCKET = DATA_BUCKET + "-athena"
attr_prefix = "attributes/business/"

# Athena MUST have a writable results location (a bucket you own)
ATHENA_RESULTS_S3 = f"s3://{ATHENA_BUCKET}/athena-results/"

ATHENA_DB = "yelp"

In [10]:
s3_public = boto3.client("s3", region_name=REGION, config=Config(signature_version=UNSIGNED))
athena = boto3.client("athena", region_name=REGION)

In [5]:
FILES = {
    "business": "yelp_academic_dataset_business.json",
    "review": "yelp_academic_dataset_review.json",
    "user": "yelp_academic_dataset_user.json",
    "checkin": "yelp_academic_dataset_checkin.json",
    "tip": "yelp_academic_dataset_tip.json",
}

OBJECT_KEYS = {t: f"{BASE_PREFIX}{fname}" for t, fname in FILES.items()}

for t, k in OBJECT_KEYS.items():
    print(f"{t:8} -> s3://{DATA_BUCKET}.s3.{REGION}.amazonaws.com/{k}")

# destination prefixes (directories)
DEST_PREFIXES = {
    table: f"{BASE_PREFIX}{table}/"
    for table in FILES
}
print(DEST_PREFIXES)

business -> s3://yelp-aai540-group6.s3.us-west-2.amazonaws.com/yelp-dataset/json/yelp_academic_dataset_business.json
review   -> s3://yelp-aai540-group6.s3.us-west-2.amazonaws.com/yelp-dataset/json/yelp_academic_dataset_review.json
user     -> s3://yelp-aai540-group6.s3.us-west-2.amazonaws.com/yelp-dataset/json/yelp_academic_dataset_user.json
checkin  -> s3://yelp-aai540-group6.s3.us-west-2.amazonaws.com/yelp-dataset/json/yelp_academic_dataset_checkin.json
tip      -> s3://yelp-aai540-group6.s3.us-west-2.amazonaws.com/yelp-dataset/json/yelp_academic_dataset_tip.json
{'business': 'yelp-dataset/json/business/', 'review': 'yelp-dataset/json/review/', 'user': 'yelp-dataset/json/user/', 'checkin': 'yelp-dataset/json/checkin/', 'tip': 'yelp-dataset/json/tip/'}


In [6]:
def verify_public_object(bucket: str, key: str):
    try:
        s3_public.head_object(Bucket=bucket, Key=key)
        return True
    except Exception as e:
        print(f"‚ùå Cannot access s3://{bucket}/{key}\n   Error: {e}")
        return False

all_ok = True
for t, key in OBJECT_KEYS.items():
    ok = verify_public_object(DATA_BUCKET, key)
    print(("‚úÖ" if ok else "‚ùå"), t, key)
    all_ok = all_ok and ok

if not all_ok:
    raise RuntimeError("One or more files were not accessible. Check names/paths.")

‚úÖ business yelp-dataset/json/yelp_academic_dataset_business.json
‚úÖ review yelp-dataset/json/yelp_academic_dataset_review.json
‚úÖ user yelp-dataset/json/yelp_academic_dataset_user.json
‚úÖ checkin yelp-dataset/json/yelp_academic_dataset_checkin.json
‚úÖ tip yelp-dataset/json/yelp_academic_dataset_tip.json


In [11]:
def run_athena_query(query: str, database: str = None) -> pd.DataFrame:
    params = {
        "QueryString": query,
        "ResultConfiguration": {"OutputLocation": ATHENA_RESULTS_S3},
    }
    if database:
        params["QueryExecutionContext"] = {"Database": database}

    # Start query
    qx = athena.start_query_execution(**params)
    qid = qx["QueryExecutionId"]

    # Wait for completion
    while True:
        resp = athena.get_query_execution(QueryExecutionId=qid)
        state = resp["QueryExecution"]["Status"]["State"]
        if state in ("SUCCEEDED", "FAILED", "CANCELLED"):
            break
        time.sleep(1)

    if state != "SUCCEEDED":
        reason = resp["QueryExecution"]["Status"].get("StateChangeReason", "")
        raise RuntimeError(f"Athena query {state}: {reason}\n\nQuery:\n{query}")

    # Fetch results
    paginator = athena.get_paginator("get_query_results")
    rows = []

    for page in paginator.paginate(QueryExecutionId=qid):
        for row in page["ResultSet"]["Rows"]:
            rows.append([c.get("VarCharValue") for c in row["Data"]])

    # If no rows (DDL statements)
    if not rows:
        return pd.DataFrame()

    # First row is header
    header = rows[0]
    data = rows[1:]

    return pd.DataFrame(data, columns=header)


In [12]:
run_athena_query(f"CREATE DATABASE IF NOT EXISTS {ATHENA_DB};")
print(f"‚úÖ Database ready: {ATHENA_DB}")

‚úÖ Database ready: yelp


In [13]:
business_location = f"s3://{DATA_BUCKET}/{DEST_PREFIXES['business']}"

run_athena_query(f"""
CREATE EXTERNAL TABLE IF NOT EXISTS {ATHENA_DB}.business (
  business_id string,
  name string,
  address string,
  city string,
  state string,
  postal_code string,
  latitude double,
  longitude double,
  stars double,
  review_count int,
  is_open int,
  attributes map<string,string>,
  categories string,
  hours map<string,string>
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('ignore.malformed.json'='true')
LOCATION '{business_location}'
TBLPROPERTIES ('has_encrypted_data'='false');
""", database=ATHENA_DB)

print("‚úÖ Created table yelp.business")

‚úÖ Created table yelp.business


In [14]:
review_location = f"s3://{DATA_BUCKET}/{DEST_PREFIXES['review']}"

run_athena_query(f"""
CREATE EXTERNAL TABLE IF NOT EXISTS {ATHENA_DB}.review (
  review_id string,
  user_id string,
  business_id string,
  stars double,
  useful int,
  funny int,
  cool int,
  text string,
  date string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('ignore.malformed.json'='true')
LOCATION '{review_location}'
TBLPROPERTIES ('has_encrypted_data'='false');
""", database=ATHENA_DB)

print("‚úÖ Created table yelp.review")

‚úÖ Created table yelp.review


In [16]:
user_location = f"s3://{DATA_BUCKET}/{DEST_PREFIXES['user']}"

run_athena_query(f"""
CREATE EXTERNAL TABLE IF NOT EXISTS {ATHENA_DB}.user (
  user_id string,
  name string,
  review_count int,
  yelping_since string,
  friends array<string>,
  useful int,
  funny int,
  cool int,
  fans int,
  elite array<string>,
  average_stars double,
  compliment_hot int,
  compliment_more int,
  compliment_profile int,
  compliment_cute int,
  compliment_list int,
  compliment_note int,
  compliment_plain int,
  compliment_cool int,
  compliment_funny int,
  compliment_writer int,
  compliment_photos int
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('ignore.malformed.json'='true')
LOCATION '{user_location}'
TBLPROPERTIES ('has_encrypted_data'='false');
""", database=ATHENA_DB)

print("‚úÖ Created table yelp.user")


‚úÖ Created table yelp.user


In [19]:
checkin_location = f"s3://{DATA_BUCKET}/{DEST_PREFIXES['checkin']}"

run_athena_query(f"""
CREATE EXTERNAL TABLE IF NOT EXISTS {ATHENA_DB}.checkin (
  business_id string,
  date string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('ignore.malformed.json'='true')
LOCATION '{checkin_location}'
TBLPROPERTIES ('has_encrypted_data'='false');
""", database=ATHENA_DB)

print("‚úÖ Created table yelp.checkin")


‚úÖ Created table yelp.checkin


In [21]:
tip_location = f"s3://{DATA_BUCKET}/{DEST_PREFIXES['tip']}"

run_athena_query(f"""
CREATE EXTERNAL TABLE IF NOT EXISTS {ATHENA_DB}.tip (
  user_id string,
  business_id string,
  text string,
  date string,
  compliment_count int
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ('ignore.malformed.json'='true')
LOCATION '{tip_location}'
TBLPROPERTIES ('has_encrypted_data'='false');
""", database=ATHENA_DB)

print("‚úÖ Created table yelp.tip")


‚úÖ Created table yelp.tip


In [35]:
run_athena_query(f"SHOW TABLES IN {ATHENA_DB};", database=ATHENA_DB)

Unnamed: 0,business
0,business_attributes
1,checkin
2,review
3,tip
4,user


In [None]:
TABLES = ["business", "review", "user", "checkin", "tip", "business_attributes"]

for table in TABLES:
    print(f"Dropping table: yelp.{table}")
    run_athena_query(
        f"DROP TABLE IF EXISTS yelp.{table};",
        database="yelp"
    )

paginator = s3.get_paginator("list_objects_v2")
to_delete = []

for page in paginator.paginate(Bucket=ATHENA_BUCKETEN, Prefix=attr_prefix):
    for obj in page.get("Contents", []):
        to_delete.append({"Key": obj["Key"]})

if not to_delete:
    print("‚úÖ Nothing to delete under", f"s3://{ATHENA_BUCKETEN}/{attr_prefix}")
else:
    # delete in batches of 1000 (S3 limit)
    for i in range(0, len(to_delete), 1000):
        s3.delete_objects(
            Bucket=bucket,
            Delete={"Objects": to_delete[i:i+1000]}
        )
    print(f"‚úÖ Deleted {len(to_delete)} objects under s3://{ATHENA_BUCKETEN}/{attr_prefix}")

print("‚úÖ All tables dropped.")

In [37]:
df = run_athena_query(f"SELECT * FROM {ATHENA_DB}.business LIMIT 2;", database="yelp")
display(df)

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours
0,JK7EcjrzqsezspdhwTnUag,Famous Tate Appliance & Bedding Center,8010 Grand Blvd,Port Richey,FL,34668,28.2734863,-82.7209732,4.5,20,1,"{restaurantspricerange2=2, restaurantstakeout=...","Shopping, Appliances & Repair, Local Services,...","{sunday=11:0-17:0, saturday=9:30-18:0, tuesday..."
1,nDExX1KzYmd03Dw_TMIXUw,Target,3064 Columbia Ave,Franklin,TN,37064,35.89233,-86.880687,3.5,17,1,"{businessacceptscreditcards=True, bikeparking=...","Drugstores, Food, Furniture Stores, Department...","{sunday=8:0-18:0, saturday=7:0-23:59, tuesday=..."


In [28]:
business_attributes_location = f"s3://{ATHENA_BUCKET}/{attr_prefix}"

run_athena_query(f"""
CREATE TABLE yelp.business_attributes
WITH (
  format = 'PARQUET',
  external_location = '{business_attributes_location}'
) AS
WITH normalized AS (
  SELECT
    business_id,
    hours,

    -- Normalize u'...' and '...' wrappers on keys + values
    map_from_entries(
      transform(
        map_entries(attributes),
        e -> CAST(
          ROW(
            regexp_replace(CAST(e[1] AS varchar), '^u?''(.*)''$', '$1'),
            regexp_replace(CAST(e[2] AS varchar), '^u?''(.*)''$', '$1')
          ) AS ROW(varchar, varchar)
        )
      )
    ) AS attrs
  FROM yelp.business
  WHERE attributes IS NOT NULL
),
parsed AS (
  SELECT
    business_id,
    hours,
    
    -- Convert literal "None" (any case) to NULL for all attribute lookups via helper expression pattern below
    attrs,

    -- Parse BusinessParking
    TRY(
      CAST(
        json_parse(
          replace(
            replace(
              replace(
                replace(
                  regexp_replace(attrs['businessparking'], 'u''(.*?)''', '"$1"'),
                  '''', '"'
                ),
                'False', 'false'
              ),
              'True', 'true'
            ),
            'None', 'null'
          )
        ) AS map(varchar, boolean)
      )
    ) AS parking_map,

    -- Parse Ambience 
    TRY(
      CAST(
        json_parse(
          replace(
            replace(
              replace(
                replace(
                  -- normalize u'...' keys inside the string
                  regexp_replace(attrs['ambience'], 'u''(.*?)''', '"$1"'),
                  '''', '"'
                ),
                'False', 'false'
              ),
              'True', 'true'
            ),
            'None', 'null'
          )
        ) AS map(varchar, boolean)
      )
    ) AS ambience_map,

    -- Parse GoodForMeal
    TRY(
      CAST(
        json_parse(
          replace(
            replace(
              replace(
                replace(
                  -- normalize u'...' keys
                  regexp_replace(attrs['goodformeal'], 'u''(.*?)''', '"$1"'),
                  '''', '"'
                ),
                'False', 'false'
              ),
              'True', 'true'
            ),
            'None', 'null'
          )
        ) AS map(varchar, boolean)
      )
    ) AS goodformeal_map,

    -- Parse BestNights
    TRY(
      CAST(
        json_parse(
          replace(
            replace(
              replace(
                replace(
                  -- normalize u'...' keys
                  regexp_replace(attrs['bestnights'], 'u''(.*?)''', '"$1"'),
                  '''', '"'
                ),
                'False', 'false'
              ),
              'True', 'true'
            ),
            'None', 'null'
          )
        ) AS map(varchar, boolean)
      )
    ) AS bestnights_map,

    -- Parse HairSpecializesIn
    TRY(
      CAST(
        json_parse(
          replace(
            replace(
              replace(
                replace(
                  -- normalize u'...' keys
                  regexp_replace(attrs['hairspecializesin'], 'u''(.*?)''', '"$1"'),
                  '''', '"'
                ),
                'False', 'false'
              ),
              'True', 'true'
            ),
            'None', 'null'
          )
        ) AS map(varchar, boolean)
      )
    ) AS hairspecializesin_map,
    
    -- Parse DietaryRestrictions
    TRY(
      CAST(
        json_parse(
          replace(
            replace(
              replace(
                replace(
                  -- normalize u'...' keys
                  regexp_replace(attrs['dietaryrestrictions'], 'u''(.*?)''', '"$1"'),
                  '''', '"'
                ),
                'False', 'false'
              ),
              'True', 'true'
            ),
            'None', 'null'
          )
        ) AS map(varchar, boolean)
      )
    ) AS dietaryrestrictions_map
  FROM normalized
)
SELECT
    business_id,
    
    -- Helper pattern: NULLIF(lower(x),'none') but preserving original case when not none
    CASE WHEN attrs['acceptsinsurance'] IS NULL OR lower(attrs['acceptsinsurance']) = 'none' THEN NULL ELSE attrs['acceptsinsurance'] END AS acceptsinsurance,
    CASE WHEN attrs['agesallowed'] IS NULL OR lower(attrs['agesallowed']) = 'none' THEN NULL ELSE attrs['agesallowed'] END AS agesallowed,
    CASE WHEN attrs['alcohol'] IS NULL OR lower(attrs['alcohol']) = 'none' THEN NULL ELSE attrs['alcohol'] END AS alcohol,
    CASE WHEN attrs['bikeparking'] IS NULL OR lower(attrs['bikeparking']) = 'none' THEN NULL ELSE attrs['bikeparking'] END AS bikeparking,
    CASE WHEN attrs['businessacceptsbitcoin'] IS NULL OR lower(attrs['businessacceptsbitcoin']) = 'none' THEN NULL ELSE attrs['businessacceptsbitcoin'] END AS businessacceptsbitcoin,
    CASE WHEN attrs['businessacceptscreditcards'] IS NULL OR lower(attrs['businessacceptscreditcards']) = 'none' THEN NULL ELSE attrs['businessacceptscreditcards'] END AS businessacceptscreditcards,
    CASE WHEN attrs['byappointmentonly'] IS NULL OR lower(attrs['byappointmentonly']) = 'none' THEN NULL ELSE attrs['byappointmentonly'] END AS byappointmentonly,
    CASE WHEN attrs['byob'] IS NULL OR lower(attrs['byob']) = 'none' THEN NULL ELSE attrs['byob'] END AS byob,
    CASE WHEN attrs['byobcorkage'] IS NULL OR lower(attrs['byobcorkage']) = 'none' THEN NULL ELSE attrs['byobcorkage'] END AS byobcorkage,
    CASE WHEN attrs['caters'] IS NULL OR lower(attrs['caters']) = 'none' THEN NULL ELSE attrs['caters'] END AS caters,
    CASE WHEN attrs['coatcheck'] IS NULL OR lower(attrs['coatcheck']) = 'none' THEN NULL ELSE attrs['coatcheck'] END AS coatcheck,
    CASE WHEN attrs['corkage'] IS NULL OR lower(attrs['corkage']) = 'none' THEN NULL ELSE attrs['corkage'] END AS corkage,
    CASE WHEN attrs['dogsallowed'] IS NULL OR lower(attrs['dogsallowed']) = 'none' THEN NULL ELSE attrs['dogsallowed'] END AS dogsallowed,
    CASE WHEN attrs['drivethru'] IS NULL OR lower(attrs['drivethru']) = 'none' THEN NULL ELSE attrs['drivethru'] END AS drivethru,
    CASE WHEN attrs['goodfordancing'] IS NULL OR lower(attrs['goodfordancing']) = 'none' THEN NULL ELSE attrs['goodfordancing'] END AS goodfordancing,
    CASE WHEN attrs['goodforkids'] IS NULL OR lower(attrs['goodforkids']) = 'none' THEN NULL ELSE attrs['goodforkids'] END AS goodforkids,
    CASE WHEN attrs['happyhour'] IS NULL OR lower(attrs['happyhour']) = 'none' THEN NULL ELSE attrs['happyhour'] END AS happyhour,
    CASE WHEN attrs['hastv'] IS NULL OR lower(attrs['hastv']) = 'none' THEN NULL ELSE attrs['hastv'] END AS hastv,
    CASE WHEN attrs['music'] IS NULL OR lower(attrs['music']) = 'none' THEN NULL ELSE attrs['music'] END AS music,
    CASE WHEN attrs['noiselevel'] IS NULL OR lower(attrs['noiselevel']) = 'none' THEN NULL ELSE attrs['noiselevel'] END AS noiselevel,
    CASE WHEN attrs['open24hours'] IS NULL OR lower(attrs['open24hours']) = 'none' THEN NULL ELSE attrs['open24hours'] END AS open24hours,
    CASE WHEN attrs['outdoorseating'] IS NULL OR lower(attrs['outdoorseating']) = 'none' THEN NULL ELSE attrs['outdoorseating'] END AS outdoorseating,
    CASE WHEN attrs['restaurantsattire'] IS NULL OR lower(attrs['restaurantsattire']) = 'none' THEN NULL ELSE attrs['restaurantsattire'] END AS restaurantsattire,
    CASE WHEN attrs['restaurantscounterservice'] IS NULL OR lower(attrs['restaurantscounterservice']) = 'none' THEN NULL ELSE attrs['restaurantscounterservice'] END AS restaurantscounterservice,
    CASE WHEN attrs['restaurantsdelivery'] IS NULL OR lower(attrs['restaurantsdelivery']) = 'none' THEN NULL ELSE attrs['restaurantsdelivery'] END AS restaurantsdelivery,
    CASE WHEN attrs['restaurantsgoodforgroups'] IS NULL OR lower(attrs['restaurantsgoodforgroups']) = 'none' THEN NULL ELSE attrs['restaurantsgoodforgroups'] END AS restaurantsgoodforgroups,
    CASE WHEN attrs['restaurantspricerange2'] IS NULL OR lower(attrs['restaurantspricerange2']) = 'none' THEN NULL ELSE attrs['restaurantspricerange2'] END AS restaurantspricerange2,
    CASE WHEN attrs['restaurantsreservations'] IS NULL OR lower(attrs['restaurantsreservations']) = 'none' THEN NULL ELSE attrs['restaurantsreservations'] END AS restaurantsreservations,
    CASE WHEN attrs['restaurantstableservice'] IS NULL OR lower(attrs['restaurantstableservice']) = 'none' THEN NULL ELSE attrs['restaurantstableservice'] END AS restaurantstableservice,
    CASE WHEN attrs['restaurantstakeout'] IS NULL OR lower(attrs['restaurantstakeout']) = 'none' THEN NULL ELSE attrs['restaurantstakeout'] END AS restaurantstakeout,
    CASE WHEN attrs['smoking'] IS NULL OR lower(attrs['smoking']) = 'none' THEN NULL ELSE attrs['smoking'] END AS smoking,
    CASE WHEN attrs['wheelchairaccessible'] IS NULL OR lower(attrs['wheelchairaccessible']) = 'none' THEN NULL ELSE attrs['wheelchairaccessible'] END AS wheelchairaccessible,
    CASE WHEN attrs['wifi'] IS NULL OR lower(attrs['wifi']) = 'none' THEN NULL ELSE attrs['wifi'] END AS wifi,
    
    -- Parking
    parking_map['garage']    AS parking_garage,
    parking_map['street']    AS parking_street,
    parking_map['validated'] AS parking_validated,
    parking_map['lot']       AS parking_lot,
    parking_map['valet']     AS parking_valet,
    
    -- Ambience
    ambience_map['divey']     AS ambience_divey,
    ambience_map['hipster']  AS ambience_hipster,
    ambience_map['casual']   AS ambience_casual,
    ambience_map['touristy'] AS ambience_touristy,
    ambience_map['trendy']   AS ambience_trendy,
    ambience_map['intimate'] AS ambience_intimate,
    ambience_map['romantic'] AS ambience_romantic,
    ambience_map['classy']   AS ambience_classy,
    ambience_map['upscale']  AS ambience_upscale,

    -- GoodForMeal
    goodformeal_map['dessert']    AS good_for_dessert,
    goodformeal_map['latenight'] AS good_for_latenight,
    goodformeal_map['lunch']     AS good_for_lunch,
    goodformeal_map['dinner']    AS good_for_dinner,
    goodformeal_map['brunch']    AS good_for_brunch,
    goodformeal_map['breakfast'] AS good_for_breakfast,

    -- BestNights
    bestnights_map['monday']    AS bestnight_monday,
    bestnights_map['tuesday']   AS bestnight_tuesday,
    bestnights_map['wednesday'] AS bestnight_wednesday,
    bestnights_map['thursday']  AS bestnight_thursday,
    bestnights_map['friday']    AS bestnight_friday,
    bestnights_map['saturday']  AS bestnight_saturday,
    bestnights_map['sunday']    AS bestnight_sunday,

    -- HairSpecializesIn
    hairspecializesin_map['africanamerican'] AS hair_africanamerican,
    hairspecializesin_map['asian']           AS hair_asian,
    hairspecializesin_map['coloring']        AS hair_coloring,
    hairspecializesin_map['curly']           AS hair_curly,
    hairspecializesin_map['extensions']      AS hair_extensions,
    hairspecializesin_map['kids']            AS hair_kids,
    hairspecializesin_map['perms']           AS hair_perms,
    hairspecializesin_map['straightperms']   AS hair_straightperms,

    -- DietaryRestrictions
    dietaryrestrictions_map['dairy-free']      AS dairy_free,
    dietaryrestrictions_map['gluten-free']    AS gluten_free,
    dietaryrestrictions_map['vegan']           AS vegan,
    dietaryrestrictions_map['kosher']          AS kosher,
    dietaryrestrictions_map['halal']           AS halal,
    dietaryrestrictions_map['soy-free']        AS soy_free,
    dietaryrestrictions_map['vegetarian']      AS vegetarian,

    -- Hours
    hours['monday']    AS hours_monday,
    hours['tuesday']   AS hours_tuesday,
    hours['wednesday'] AS hours_wednesday,
    hours['thursday']  AS hours_thursday,
    hours['friday']    AS hours_friday,
    hours['saturday']  AS hours_saturday,
    hours['sunday']    AS hours_sunday,

    CARDINALITY(map_keys(hours)) AS open_days_count,
    CASE WHEN hours['saturday'] IS NOT NULL OR hours['sunday'] IS NOT NULL THEN true ELSE false END AS open_on_weekend

FROM parsed;
""", database="yelp")

print("‚úÖ Built yelp.business_attributes")
print("üìç Location:", business_attributes_location)

‚úÖ Built yelp.business_attributes
üìç Location: s3://yelp-aai540-group6-athena/attributes/business/
