In [1]:
import requests
from datetime import datetime
# import boto3
from decimal import Decimal


class RestaurantData:
    def __init__(self):
        self.restaurant_ids = set()

    def _is_value_empty(self, value):
        return not value or len(str(value)) == 0

    def _format_data(self, entry, cuisine):
        formatted = {}
        self.restaurant_ids.add(entry['id'])

        formatted['id'] = entry['id']
        formatted['insertedAtTimestamp'] = str(datetime.now())
        formatted['cuisine_type'] = 'indian' if cuisine == 'indpak' else cuisine
        formatted['name'] = entry['name']
        formatted['url'] = entry['url']

        if not self._is_value_empty(entry.get("rating")):
            formatted["rating"] = Decimal(entry["rating"])
        if not self._is_value_empty(entry.get("coordinates")):
            formatted["latitude"] = Decimal(str(entry["coordinates"]["latitude"]))
            formatted["longitude"] = Decimal(str(entry["coordinates"]["longitude"]))
        if not self._is_value_empty(entry.get("phone")):
            formatted["contact"] = entry["phone"]
        if not self._is_value_empty(entry.get("review_count")):
            formatted["review_count"] = entry["review_count"]
        if not self._is_value_empty(entry.get("price")):
            formatted["price"] = entry["price"]

        if entry.get('location'):
            address = "".join(entry['location']['display_address'])
            formatted['address'] = address
            formatted["zip_code"] = entry['location']['zip_code']

        return formatted

    def fetch_yelp_data(self, api, api_key):
        headers = {"Authorization": "Bearer " + api_key}
        cuisines = ['indpak', 'italian', 'mexican', 'chinese', 'japanese', 'french', 'greek']
        location = 'manhattan'
        all_data = []

        for cuisine in cuisines:
            total_entries = 1000
            offset = 0
            while total_entries > 0:
                params = {
                    'location': location,
                    'categories': cuisine,
                    'limit': 50,
                    'offset': offset
                }
                response = requests.get(api, headers=headers, params=params).json()
                if response.get("businesses"):
                    for entry in response["businesses"]:
                        if entry['id'] in self.restaurant_ids:
                            continue
                        all_data.append(self._format_data(entry, cuisine))
                    total_entries -= len(response["businesses"])
                    offset += len(response["businesses"])
                else:
                    break

        return all_data

    # def store_data(self, restaurant_data):
    #     dynamodb = boto3.resource('dynamodb',
    #                               aws_access_key_id='',
    #                               aws_secret_access_key='',
    #                               region_name='us-east-1')
    #     table = dynamodb.Table('yelp_restaurants')
    #     for data in restaurant_data:
    #         table.put_item(Item=data)




In [4]:
api_url = 'https://api.yelp.com/v3/businesses/search'
# Removed the hardcoded credentials for security before pushing to GitHub

processor = RestaurantData()
data = processor.fetch_yelp_data(api_url, api_access_key)
# processor.store_data(data)

In [14]:
import pickle

data = pickle.load(open("yeld_restaraunt_info.p", "rb"))

In [15]:
import json
from decimal import Decimal

class DecimalEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, Decimal):
            return float(obj)
        return super(DecimalEncoder, self).default(obj)

def sanitize_item(item):
    return json.loads(json.dumps(item, cls=DecimalEncoder), parse_float=Decimal)

In [18]:
from botocore.exceptions import ClientError
import boto3

def push_to_dynamodb(table_name, item, aws_access_key_id, aws_secret_access_key):
    # Initialize a DynamoDB client
    dynamodb = boto3.resource('dynamodb', region_name="us-east-1", aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key)
    
    # Get the table
    table = dynamodb.Table(table_name)
    sanitized_item = sanitize_item(item)
    try:
        # Put the item into the table
        response = table.put_item(Item=sanitized_item)
        print(f"Successfully added item to {table_name}")
        return response
    except ClientError as e:
        print(f"Error adding item to {table_name}: {e.response['Error']['Message']}")
        return None

In [20]:
tablename = 'yelp-restaurants'

for i in range(1):
    print(i)
    push_to_dynamodb(tablename, data[i], aws_access_key, aws_secret_access_key)

0
Successfully added item to yelp-restaurants


In [16]:
data[3]

{'id': 'A-ert0jDRBfku9ogyW_mEg',
 'insertedAtTimestamp': '2024-10-15 15:21:27.454296',
 'cuisine_type': 'indian',
 'name': 'Spice Symphony - Midtown East',
 'url': 'https://www.yelp.com/biz/spice-symphony-midtown-east-new-york?adjust_creative=0_dLB7aYv_yCRg13g3cs0g&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=0_dLB7aYv_yCRg13g3cs0g',
 'rating': Decimal('4.4000000000000003552713678800500929355621337890625'),
 'latitude': Decimal('40.7558937072754'),
 'longitude': Decimal('-73.9715347290039'),
 'contact': '+12123004869',
 'review_count': 708,
 'price': '$$',
 'address': '150 E 50th StNew York, NY 10022',
 'zip_code': '10022'}

In [2]:
import pickle

data = pickle.load(open("yeld_restaraunt_info.p", "rb"))

In [9]:
import json

# Format the data for Elasticsearch bulk operations
formatted_data = []
for item in data:
    # Add the action metadata
    action = {
        "index": {
            "_index": "restaurants",  # Replace with your index name
            "_id": item.get("id", None)  # Assuming each item has an 'id' field
        }
    }
    formatted_data.append(action)
    cleaneddata = {"id": item.get("id", None), "cuisine_type": item.get("cuisine_type", None)}
    formatted_data.append(cleaneddata)

# Save the formatted data to a new file
with open('formatted_data.json', 'w') as f:
    for line in formatted_data:
        f.write(json.dumps(line) + '\n')

In [None]:
"""
curl -X POST -u "uname:password" "https://search-restaurants-2xdgvyexvq2piwckzjh5k6vh64.us-east-2.es.amazonaws.com/_bulk" --data-binary "@formatted_data.json" -H "Content-Type: application/json"
curl -X POST -u "uname:password" "https://search-restaurants-vdxopwnqknskbpvq5envfpxje4.us-east-1.es.amazonaws.com/_bulk" --data-binary "@formatted_data.json" -H "Content-Type: application/json"

"https://search-restaurants-vdxopwnqknskbpvq5envfpxje4.us-east-1.es.amazonaws.com/"
"""