In [2]:
from decimal import Decimal
from datetime import datetime, timezone
import pandas as pd
from tqdm import tqdm
import boto3

df = pd.read_csv("manhattan_restaurants.csv")
dynamodb = boto3.resource('dynamodb', region_name='us-east-1')
table = dynamodb.Table('yelp-restaurants')

for _, row in tqdm(df.iterrows(), total=len(df), desc="Uploading to DynamoDB"):
    lat, lon = row["latitude"], row["longitude"]
    if pd.isna(lat) or pd.isna(lon):
        continue  # skip rows with missing coordinates

    item = {
        "business_id": str(row["id"]),
        "name": str(row["name"]),
        "cuisine": str(row["cuisine"]),
        "rating": Decimal(str(row["rating"])),
        "review_count": int(row["review_count"]),
        "price": str(row["price"]),
        "address": str(row["address"]),
        "coordinates": {
            "latitude": Decimal(str(lat)),
            "longitude": Decimal(str(lon))
        },
        "phone": str(row["phone"]) if not pd.isna(row["phone"]) else "",
        "url": str(row["url"]) if not pd.isna(row["url"]) else "",
        "zip_code": str(row["zip_code"]),
        "insertedAtTimestamp": datetime.now(timezone.utc).isoformat()
    }

    with table.batch_writer() as batch:
        batch.put_item(Item=item)

print("✅ All CSV data uploaded to DynamoDB!")


Uploading to DynamoDB: 100%|██████████| 1000/1000 [00:29<00:00, 33.77it/s]

✅ All CSV data uploaded to DynamoDB!



