In [4]:
!pip install requests-aws4auth
import boto3
import requests
from requests_aws4auth import AWS4Auth
import json
import random

# --- AWS Setup ---
dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table("yelp-restaurants")

ES_HOST = "https://search-restaurants-rgucpr3whjjvho6cfxx442h7l4.aos.us-east-1.on.aws"
session = boto3.session.Session()
credentials = session.get_credentials()
region = session.region_name or 'us-east-1'
awsauth = AWS4Auth(
    credentials.access_key,
    credentials.secret_key,
    region,
    'es',
    session_token=credentials.token
)

# --- Fetch all restaurants from DynamoDB ---
response = table.scan()
items = response['Items']

# --- Group restaurants by cuisine ---
cuisine_groups = {}
for restaurant in items:
    cuisine = restaurant.get('cuisine', '').lower().strip()
    if not cuisine:
        continue
    cuisine_groups.setdefault(cuisine, []).append(restaurant)

# --- Pick 5 random cuisines ---
selected_cuisines = random.sample(list(cuisine_groups.keys()), 5)

# --- Pick up to 4 restaurants per cuisine (total 20) ---
selected_restaurants = []
for cuisine in selected_cuisines:
    restaurants = cuisine_groups[cuisine]
    random.shuffle(restaurants)
    selected_restaurants.extend(restaurants[:4])

print(f"Selected cuisines: {selected_cuisines}")
print(f"Total restaurants to index: {len(selected_restaurants)}")

# --- Index into Elasticsearch ---
count = 0
for restaurant in selected_restaurants:
    doc = {
        "RestaurantID": restaurant['business_id'],
        "cuisine": restaurant.get('cuisine', '').lower()
    }

    res = requests.post(
        f"{ES_HOST}/restaurants/_doc/{restaurant['business_id']}",
        auth=awsauth,
        headers={"Content-Type": "application/json"},
        data=json.dumps(doc)
    )

    print(f"[{count+1}] Indexed {restaurant['business_id']} ({doc['cuisine']}) -> {res.status_code}")
    count += 1

print("\n✅ Done indexing 20 restaurants across 5 cuisines.")


Selected cuisines: ['italian', 'japanese', 'mexican', 'chinese', 'indian']
Total restaurants to index: 20
[1] Indexed IxwuIDcpRa7EpQs7V481Ww (italian) -> 201
[2] Indexed 5Ukroq-mXS86EtRKj5deAQ (italian) -> 201
[3] Indexed TWH4MjLtN1fKlF-7n6YXHg (italian) -> 201
[4] Indexed kr5fRwdHtVwPPwSJxm9Fkg (italian) -> 201
[5] Indexed 6lUsFj0KCao678o9L7QwUw (japanese) -> 201
[6] Indexed k6B6NPw758qPPmHVWQdExw (japanese) -> 201
[7] Indexed ULDVJYTT-47RqbLtlR_FUA (japanese) -> 201
[8] Indexed GG72AwlaBM90YdLvRJliFg (japanese) -> 201
[9] Indexed BEYmqrP1_TC0nhnTGKkkJg (mexican) -> 201
[10] Indexed owNOHkJoqb7Bba4aCwTX-Q (mexican) -> 201
[11] Indexed PqqnNrUtU7XTKQvUM4cyqQ (mexican) -> 200
[12] Indexed dtAGaV2JLVvljg-0LZTa_w (mexican) -> 201
[13] Indexed C3tP0fNNAoxOAC3O406CXQ (chinese) -> 201
[14] Indexed 8plXW6ZQI1awPu1scms3jQ (chinese) -> 201
[15] Indexed FcXsiSJbk2ZnHyFJMkMK3g (chinese) -> 201
[16] Indexed TU_BU9HLflYI2xlyqVQdRA (chinese) -> 201
[17] Indexed 0qEzruxRcocgGqUOY6Me0w (indian) -> 201