### Prepare data then insert into DynamoDB

In [1]:
from __future__ import print_function # Python 2/3 compatibility
import boto3
import csv

### Combine all cuisine types into one csv file, ensure Business_ID unique

In [2]:
types = ['chinese', 'italian', 'indian','mexican','american','sushi']

In [3]:
# combine all the file into FILE_0, ensure restaurant business_ids are unique
business_id_set = set()
line_count = 0
for t in types:
    print(t)
    file_name = 'yelp_'+t+'.csv'
    new_file = 'yelp.csv'
    with open(file_name) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        with open(new_file,'a') as out:
            csv_out = csv.writer(out)
            for row in csv_reader:
                # each row: ['Business_ID', 'Name', 'Address', 'Coordinates', 'Num_of_Reviews', 'Rating', 'Zip_Code', 'Cuisine']
                if line_count == 0:
                    csv_out.writerow(['Business_ID', 'Name', 'Address', 'Coordinates', 'Num_of_Reviews', 'Rating', 'Zip_Code', 'Cuisine'])
                    line_count += 1
                else:
                    if row[0] and row[0] not in business_id_set:
                        row.append(t)
                        csv_out.writerow(row)
                        business_id_set.add(row[0])
                        line_count += 1
                    else:
                        pass

chinese
italian
indian
mexican
american
sushi


### Connect to DynamoDB

In [4]:
# Get the service resource.
dynamodb = boto3.resource('dynamodb', region_name='us-east-1')
# A low-level client representing Amazon DynamoDB
client = boto3.client('dynamodb')

In [5]:
table = dynamodb.create_table(
    TableName='yelp-restaurant',
    KeySchema=[
        {
            'AttributeName': 'Business_ID', #Partition key
            'KeyType': 'HASH'
        },
        {
            'AttributeName': 'insertedAtTimestamp', #sort key
            'KeyType': 'RANGE'
        }
    ],
    AttributeDefinitions=[
        {
            'AttributeName': 'Business_ID',
            'AttributeType': 'S'
        },
        {
            'AttributeName': 'insertedAtTimestamp',
            'AttributeType': 'S'
        },
    ],
    ProvisionedThroughput={
        'ReadCapacityUnits': 5,
        'WriteCapacityUnits': 5
    }
)

In [6]:
# Wait until the table exists.
table.meta.client.get_waiter('table_exists').wait(TableName='yelp-restaurant')

In [7]:
# Print out some data about the table.
print(table.item_count)

0


### insert data from csv

In [8]:
import time

In [9]:
print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))

2019-04-05 15:47:35


In [10]:
with table.batch_writer() as batch:
    with open('yelp.csv') as csvfile:
        reader = csv.reader(csvfile)
        #['Business_ID', 'Name', 'Address', 'Coordinates', 'Num_of_Reviews', 'Rating', 'Zip_Code', 'Cuisine']
        for row in reader:
            try:
                batch.put_item(
                Item={
                    'insertedAtTimestamp': time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
                    'cuisine': row[7],
                    'Business_ID': row[0],
                    'Name': row[1],
                    'Address': row[2],
                    'Coordinates': row[3],
                    'Num_of_Reviews':row[4],
                    'Rating':row[5],
                    'Zip_Code':row[6]
                })
            except:
                print(row)

['DLRbxg74oWNPQJdbSkTDxQ', 'Suprema Provisions', '305 Bleecker St', "{'latitude': 40.7326925, 'longitude': -74.0036155}", '183', '3.5', '10014', 'italian']
['ZrhOYpCBzHYxM6jPQLNerQ', 'Indian Xpres', '19 Corson Ave', "{'latitude': 40.6383408, 'longitude': -74.0793587}", '35', '3.5', '10301', 'indian']
['NqnCmOlPIJ_QBQzvZ28djA', 'Sapthagiri Taste Of India', '804 Newark Ave', "{'latitude': 40.73571680043, 'longitude': -74.0650333434929}", '400', '3.5', '07306', 'indian']
['5iKAeJ8RVrNGRUNy-355jQ', 'Deccan Spice', '771 Newark Ave', "{'latitude': 40.73522, 'longitude': -74.06385}", '343', '3.5', '07306', 'indian']
['-PCQyh7uZPBamQYfpI8RBw', 'Dosa Hut', '777 Newark Ave', "{'latitude': 40.7352599, 'longitude': -74.06407}", '104', '3.0', '07306', 'indian']
['HwDu45zV90Wmprn6sPttCQ', 'Mexican Festival', '2672 Broadway', "{'latitude': 40.7982804740353, 'longitude': -73.9689128100872}", '234', '3.0', '10025', 'mexican']
['qmoA7CY8hhU4RIUGAfRYhg', 'Taqueria Huarache', '16 E Mt Eden Ave', "{'latitu

### Some querying and testing 

In [34]:
table = dynamodb.Table('yelp-restaurant')
# Print out some data about the table.
# This will cause a request to be made to DynamoDB and its attribute
# values will be set based on the response.
print(table.creation_date_time)
print(table.item_count)

2019-04-05 15:47:11.948000-04:00
0


In [37]:
### query
from boto3.dynamodb.conditions import Key, Attr

response = table.scan(
    FilterExpression=Attr('cuisine').lt('chinese')
)
items = response['Items']
print(len(items))

780


In [42]:
client = boto3.client('dynamodb')

In [43]:
table = client.describe_table(
    TableName='yelp-restaurant'
)
print(table)

{u'Table': {u'TableArn': u'arn:aws:dynamodb:us-east-1:403700260976:table/yelp-restaurant', u'AttributeDefinitions': [{u'AttributeName': u'Business_ID', u'AttributeType': u'S'}, {u'AttributeName': u'insertedAtTimestamp', u'AttributeType': u'S'}], u'ProvisionedThroughput': {u'NumberOfDecreasesToday': 0, u'WriteCapacityUnits': 5, u'ReadCapacityUnits': 5}, u'TableSizeBytes': 0, u'TableName': u'yelp-restaurant', u'TableStatus': u'ACTIVE', u'TableId': u'97b47bbb-ed1b-417a-900b-e08ffdb7a78c', u'KeySchema': [{u'KeyType': u'HASH', u'AttributeName': u'Business_ID'}, {u'KeyType': u'RANGE', u'AttributeName': u'insertedAtTimestamp'}], u'ItemCount': 0, u'CreationDateTime': datetime.datetime(2019, 4, 5, 15, 47, 11, 948000, tzinfo=tzlocal())}, 'ResponseMetadata': {'RetryAttempts': 0, 'HTTPStatusCode': 200, 'RequestId': 'UVBVR7T6B1R9NAQ2ONC18AO8N7VV4KQNSO5AEMVJF66Q9ASUAAJG', 'HTTPHeaders': {'x-amzn-requestid': 'UVBVR7T6B1R9NAQ2ONC18AO8N7VV4KQNSO5AEMVJF66Q9ASUAAJG', 'content-length': '613', 'server': 'S