In [24]:
import pandas as pd
import boto3
import os
import json
from decimal import Decimal
from datetime import datetime


In [4]:
df = pd.read_csv('yelp-restaurants.csv', index_col=0)

In [27]:
df

Unnamed: 0,id,name,address,zip_code,latitude,longitude,rating,review_count,cuisine
0,hdiuRS9sVZSMReZm4oV5SA,Da Andrea,"35 W 13th St, New York, NY 10011",10011,40.736218,-73.995970,4.5,1822,italian
1,VmdyRRMtUXOWV7F7w0ImwQ,Piccola Cucina Uptown,"106 E 60th St, New York, NY 10022",10022,40.763174,-73.969046,4.5,284,italian
2,16ZnHpuaaBt92XWeJHCC5A,Olio e Più,"3 Greenwich Ave, New York, NY 10014",10014,40.733798,-73.999774,4.5,4035,italian
3,GjEE8vvjqb7eeHA3BGNxAw,Barolo East,"214 E 49th St, New York, NY 10017",10017,40.754810,-73.970590,4.5,247,italian
4,OWNmOq-TAsHjGcYdCVclSw,Osteria La Baia,"129 W 52nd St, New York, NY 10019",10019,40.761896,-73.980675,4.5,68,italian
...,...,...,...,...,...,...,...,...,...
4710,RqnPXUsRyFShLA5zhSQKFg,Pearl Street Supper Club,"147 Front St, Brooklyn, NY 11201",11201,40.702530,-73.987110,4.5,7,indian
4711,xmYzN332UbHdFubseLES_g,Ditmars Bar & Grill,"102-05 Ditmars Blvd, Queens, NY 11369",11369,40.768955,-73.867559,4.0,1,indian
4712,unjpqbSvkaldY4iQkVAn6Q,Maman,"67 University Pl, New York, NY 10003",10003,40.732863,-73.993403,3.5,46,indian
4713,HCR1hJIAeY8Z32AnZdwsOA,The Blue Rooftop,"114 E 32nd St, New York, NY 10016",10016,40.745260,-73.981630,3.5,31,indian


In [28]:
df.to_dict('records')

[{'id': 'hdiuRS9sVZSMReZm4oV5SA',
  'name': 'Da Andrea',
  'address': '35 W 13th St, New York, NY 10011',
  'zip_code': 10011,
  'latitude': 40.736218,
  'longitude': -73.99597,
  'rating': 4.5,
  'review_count': 1822,
  'cuisine': 'italian'},
 {'id': 'VmdyRRMtUXOWV7F7w0ImwQ',
  'name': 'Piccola Cucina Uptown',
  'address': '106 E 60th St, New York, NY 10022',
  'zip_code': 10022,
  'latitude': 40.763174,
  'longitude': -73.969046,
  'rating': 4.5,
  'review_count': 284,
  'cuisine': 'italian'},
 {'id': '16ZnHpuaaBt92XWeJHCC5A',
  'name': 'Olio e Più',
  'address': '3 Greenwich Ave, New York, NY 10014',
  'zip_code': 10014,
  'latitude': 40.733798036104304,
  'longitude': -73.99977392649927,
  'rating': 4.5,
  'review_count': 4035,
  'cuisine': 'italian'},
 {'id': 'GjEE8vvjqb7eeHA3BGNxAw',
  'name': 'Barolo East',
  'address': '214 E 49th St, New York, NY 10017',
  'zip_code': 10017,
  'latitude': 40.75481,
  'longitude': -73.97059,
  'rating': 4.5,
  'review_count': 247,
  'cuisine': 

In [29]:
data = df.to_dict("records")

# Create a DynamoDB client
dynamodb = boto3.resource(
    "dynamodb",
    aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
    aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
    region_name="us-east-1",
)

# Get a reference to your table
table = dynamodb.Table("yelp-restaurants")

for restaurant in data:
    restaurant["insertedAtTimestamp"] = str(datetime.now())
    response = table.put_item(Item=json.loads(json.dumps(restaurant), parse_float=Decimal))

In [30]:
table.get_item(Key={"id": "z8TIY5xLeQptbXxZp3LpsA"})

{'Item': {'rating': Decimal('4.5'),
  'zip_code': Decimal('10019'),
  'insertedAtTimestamp': '2023-02-24 17:42:24.466391',
  'longitude': Decimal('-73.975396'),
  'address': '15 W 56th St, New York, NY 10019',
  'id': 'z8TIY5xLeQptbXxZp3LpsA',
  'latitude': Decimal('40.762862'),
  'name': 'Felice 56',
  'review_count': Decimal('88'),
  'cuisine': 'italian'},
 'ResponseMetadata': {'RequestId': 'L0ORLF3ILPUIFNLKC1JPH36HRRVV4KQNSO5AEMVJF66Q9ASUAAJG',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'server': 'Server',
   'date': 'Fri, 24 Feb 2023 22:49:15 GMT',
   'content-type': 'application/x-amz-json-1.0',
   'content-length': '337',
   'connection': 'keep-alive',
   'x-amzn-requestid': 'L0ORLF3ILPUIFNLKC1JPH36HRRVV4KQNSO5AEMVJF66Q9ASUAAJG',
   'x-amz-crc32': '1184832954'},
  'RetryAttempts': 0}}

In [26]:
# delete all items

scan = table.scan()
with table.batch_writer() as batch:
    for each in scan['Items']:
        batch.delete_item(
            Key={
                'id': each['id']
            }
        )

In [40]:
# Convert dataframe to a list of dictionaries
records = df.to_dict('records')

# Initialize an empty list for storing NDJSON data
ndjson_data = []

# Loop through each record and add metadata line and JSON string
for record in records:
  # Create metadata line with index and ID
  meta = {"index": {"_index": "restaurants", "_id": record["id"]}}
  # Serialize metadata line and record into JSON strings
  meta_str = json.dumps(meta)
  record_str = json.dumps({"id": record["id"], "cuisine": record["cuisine"]})
  # Append JSON strings to NDJSON data list, separated by newlines
  ndjson_data.append(meta_str + "\n" + record_str + "\n")

# End request body with a newline character
ndjson_data.append("\n")

# Join NDJSON data list into a single string
request_body = "".join(ndjson_data)

# Print request body
print(request_body)

# Serialize request body into a JSON string
# json_str = json.dumps(request_body)


# Save JSON string as a text file
with open("data.json", "w") as f:
  f.write(request_body)

{"index": {"_index": "restaurants", "_id": "hdiuRS9sVZSMReZm4oV5SA"}}
{"id": "hdiuRS9sVZSMReZm4oV5SA", "cuisine": "italian"}
{"index": {"_index": "restaurants", "_id": "VmdyRRMtUXOWV7F7w0ImwQ"}}
{"id": "VmdyRRMtUXOWV7F7w0ImwQ", "cuisine": "italian"}
{"index": {"_index": "restaurants", "_id": "16ZnHpuaaBt92XWeJHCC5A"}}
{"id": "16ZnHpuaaBt92XWeJHCC5A", "cuisine": "italian"}
{"index": {"_index": "restaurants", "_id": "GjEE8vvjqb7eeHA3BGNxAw"}}
{"id": "GjEE8vvjqb7eeHA3BGNxAw", "cuisine": "italian"}
{"index": {"_index": "restaurants", "_id": "OWNmOq-TAsHjGcYdCVclSw"}}
{"id": "OWNmOq-TAsHjGcYdCVclSw", "cuisine": "italian"}
{"index": {"_index": "restaurants", "_id": "hkooeEkd3oPsLD6JJrYpjA"}}
{"id": "hkooeEkd3oPsLD6JJrYpjA", "cuisine": "italian"}
{"index": {"_index": "restaurants", "_id": "Tp3MF3DNXuW41A10PQ2nFw"}}
{"id": "Tp3MF3DNXuW41A10PQ2nFw", "cuisine": "italian"}
{"index": {"_index": "restaurants", "_id": "IAbOg5Kd5AynOcbI8vGTwQ"}}
{"id": "IAbOg5Kd5AynOcbI8vGTwQ", "cuisine": "italian"}
