In [7]:
import pandas as pd
from spdynamodb import DynamoTable
from time import sleep
import json
from decimal import Decimal
import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logging.debug("test")

In [None]:
df = pd.read_csv('directory.zip', compression='zip')

In [None]:
df.head()

In [2]:
dt = DynamoTable()
try:
    dt.select_table('StarbucksLocations')
    print(dt)
except:
    dt.create_table(
        table_name='StarbucksLocations',
        partition_key='StoreNumber',
        partition_key_type='S',
        provisioned=False
    )

- Table name: StarbucksLocations            
- Table arn: arn:aws:dynamodb:us-east-1:688733876861:table/StarbucksLocations            
- Table creation: 2023-04-19 15:40:23.877000-03:00            
- [{'AttributeName': 'StoreNumber', 'KeyType': 'HASH'}]            
- [{'AttributeName': 'Country', 'AttributeType': 'S'}, {'AttributeName': 'StateCityPostcode', 'AttributeType': 'S'}, {'AttributeName': 'StoreNumber', 'AttributeType': 'S'}]            
- Point-in-time recovery status: DISABLED


In [3]:
# Check the status of deletion protection
dt.delete_protection

True

In [None]:
# Activate deletion protection for the table
dt.delete_protection = True

In [8]:
# Set the status of point in time recovery
dt.status_pitr = "ENABLED"

INFO:spdynamodb:Point-in-time recovery is already ENABLED.


In [9]:
dt

- Table name: StarbucksLocations            
- Table arn: arn:aws:dynamodb:us-east-1:688733876861:table/StarbucksLocations            
- Table creation: 2023-04-19 15:40:23.877000-03:00            
- [{'AttributeName': 'StoreNumber', 'KeyType': 'HASH'}]            
- [{'AttributeName': 'Country', 'AttributeType': 'S'}, {'AttributeName': 'StateCityPostcode', 'AttributeType': 'S'}, {'AttributeName': 'StoreNumber', 'AttributeType': 'S'}]            
- Point-in-time recovery status: ENABLED

In [None]:
df_test = df.sample(1000)

def convert_key(row):
    return str(row['State/Province']).upper() + '#' \
            + str(row['City']).upper() + '#' \
            + str(row['Postcode']).upper()

df_test['StateCityPostcode'] = df_test.apply(lambda x: convert_key(x), axis=1)

df_test['Location'] = df_test.apply(lambda x: {'lat': x['Latitude'], 'lon': x['Longitude']}, axis=1)
df_test['Address'] = df_test.apply(lambda x: {'address': x['Street Address'], 'city': x['City'], 'state': x['State/Province'], 'zip': x['Postcode']}, axis=1)
# Drop this columns
df_test.drop(['Latitude', 'Longitude', 'Street Address', 'City', 'State/Province', 'Postcode'], axis=1, inplace=True)

In [None]:
df_test.head()

In [None]:
df_test.rename(columns={'State/Province': 'State', 'Country Code': 'CountryCode', 'Store Number': 'StoreNumber',
                        'Store Name': 'StoreName', 'Ownership Type': 'OwnershipType', 
                        'Street Address': 'StreetAddress', 'Phone Number': 'PhoneNumber'}, inplace=True)

In [None]:
dt.batch_pandas(df_test)

In [None]:
dt.create_global_secondary_index(
    att_name="Country",
    att_type="S",
    sort_index="StateCityPostcode",
    sort_type="S",
    i_name="StoreLocationIndex"
)

In [None]:
status = dt.check_status_gsi()
if status == 'CREATING':
    print("Global secondary index is being created, this may take a few minutes...")
    while status == 'CREATING':
        status = dt.check_status_gsi()
        sleep(30)
print("Global secondary index created.")

In [11]:
dt.query_items('11164-99769', to_pandas=True)

Unnamed: 0,Address,Timezone,Country,StateCityPostcode,PhoneNumber,Location,StoreName,Brand,StoreNumber,OwnershipType
0,"{'zip': '926182951', 'address': '525 Spectrum ...",GMT-08:00 America/Los_Angeles,US,CA#IRVINE#926182951,949-585-0051,"{'lat': 33.65, 'lon': -117.75}",Irvine Spectrum Mall,Starbucks,11164-99769,Company Owned


In [14]:
dt.query_partiql("SELECT * FROM <table> WHERE Country  = ? AND begins_with(StateCityPostcode, ?)", parameters=['US', 'NY#NEW'])

Unnamed: 0,Address,Timezone,Country,StateCityPostcode,PhoneNumber,Location,StoreName,Brand,StoreNumber,OwnershipType
0,"{'zip': '100256924', 'address': '2521 Broadway...",GMT-05:00 America/New_York,US,NY#NEW YORK#100256924,(212) 316-0374,"{'lat': 40.79, 'lon': -73.97}",95th & Broadway,Starbucks,7699-45959,Company Owned
1,"{'zip': '10017', 'address': '280 Park Avenue',...",GMT-05:00 America/New_York,US,NY#NEW YORK#10017,212 682-2586,"{'lat': 40.76, 'lon': -73.97}",49th & Park,Starbucks,19030-193844,Company Owned
2,"{'zip': '100133558', 'address': '405 Broadway'...",GMT-05:00 America/New_York,US,NY#NEW YORK#100133558,646-613-0148,"{'lat': 40.72, 'lon': -74}",Broadway & Canal,Starbucks,11934-106845,Company Owned
3,"{'zip': '10013', 'address': '32 Avenue of the ...",GMT-05:00 America/New_York,US,NY#NEW YORK#10013,2129660984,"{'lat': 40.72, 'lon': -74}",6th Ave at Walker Street,Starbucks,20259-184464,Company Owned
4,"{'zip': '100042207', 'address': '2 Broadway', ...",GMT-05:00 America/New_York,US,NY#NEW YORK#100042207,212-344-4290,"{'lat': 40.7, 'lon': -74.01}",2 Broadway,Starbucks,7920-88803,Company Owned
5,"{'zip': '100023133', 'address': '80 Delancey S...",GMT-05:00 America/New_York,US,NY#NEW YORK#100023133,917 5341397,"{'lat': 40.72, 'lon': -73.99}",Allen & Delancey,Starbucks,10044-99132,Company Owned
6,"{'zip': '100051003', 'address': '55 Liberty St...",GMT-05:00 America/New_York,US,NY#NEW YORK#100051003,212-227-0372,"{'lat': 40.71, 'lon': -74.01}",Liberty & Nassau,Starbucks,7691-52959,Company Owned
7,"{'zip': '10028', 'address': '1142 Madison Ave'...",GMT-05:00 America/New_York,US,NY#NEW YORK#10028,2122881506,"{'lat': 40.78, 'lon': -73.96}",85th & Madison,Starbucks,29940-255942,Company Owned
8,"{'zip': '10018', 'address': '525 7th Avenue', ...",GMT-05:00 America/New_York,US,NY#NEW YORK#10018,212-869-5273,"{'lat': 40.75, 'lon': -73.99}",39th & 7th,Starbucks,7511-12733,Company Owned


In [19]:
dt.query_partiql("SELECT * FROM <table> WHERE Country  = ? AND contains(StateCityPostcode, ?)", parameters=['US', '#100'], )

Unnamed: 0,Address,Timezone,Country,StateCityPostcode,PhoneNumber,Location,StoreName,Brand,StoreNumber,OwnershipType
0,"{'zip': '100256924', 'address': '2521 Broadway...",GMT-05:00 America/New_York,US,NY#NEW YORK#100256924,(212) 316-0374,"{'lat': 40.79, 'lon': -73.97}",95th & Broadway,Starbucks,7699-45959,Company Owned
1,"{'zip': '10017', 'address': '280 Park Avenue',...",GMT-05:00 America/New_York,US,NY#NEW YORK#10017,212 682-2586,"{'lat': 40.76, 'lon': -73.97}",49th & Park,Starbucks,19030-193844,Company Owned
2,"{'zip': '100133558', 'address': '405 Broadway'...",GMT-05:00 America/New_York,US,NY#NEW YORK#100133558,646-613-0148,"{'lat': 40.72, 'lon': -74}",Broadway & Canal,Starbucks,11934-106845,Company Owned
3,"{'zip': '10013', 'address': '32 Avenue of the ...",GMT-05:00 America/New_York,US,NY#NEW YORK#10013,2129660984,"{'lat': 40.72, 'lon': -74}",6th Ave at Walker Street,Starbucks,20259-184464,Company Owned
4,"{'zip': '100042207', 'address': '2 Broadway', ...",GMT-05:00 America/New_York,US,NY#NEW YORK#100042207,212-344-4290,"{'lat': 40.7, 'lon': -74.01}",2 Broadway,Starbucks,7920-88803,Company Owned
5,"{'zip': '100023133', 'address': '80 Delancey S...",GMT-05:00 America/New_York,US,NY#NEW YORK#100023133,917 5341397,"{'lat': 40.72, 'lon': -73.99}",Allen & Delancey,Starbucks,10044-99132,Company Owned
6,"{'zip': '100051003', 'address': '55 Liberty St...",GMT-05:00 America/New_York,US,NY#NEW YORK#100051003,212-227-0372,"{'lat': 40.71, 'lon': -74.01}",Liberty & Nassau,Starbucks,7691-52959,Company Owned
7,"{'zip': '10028', 'address': '1142 Madison Ave'...",GMT-05:00 America/New_York,US,NY#NEW YORK#10028,2122881506,"{'lat': 40.78, 'lon': -73.96}",85th & Madison,Starbucks,29940-255942,Company Owned
8,"{'zip': '10018', 'address': '525 7th Avenue', ...",GMT-05:00 America/New_York,US,NY#NEW YORK#10018,212-869-5273,"{'lat': 40.75, 'lon': -73.99}",39th & 7th,Starbucks,7511-12733,Company Owned


In [20]:
country = 'MX'
result = dt.query_partiql("SELECT * FROM <table> WHERE Country  = ?", parameters=[country])
print(f"In {country} there are {len(result)} Starbucks locations.")

In MX there are 21 Starbucks locations.


In [46]:
dt.query_partiql("SELECT * FROM <table> WHERE Country  = ? AND contains(StateCityPostcode, ?)", parameters=['US', 'MIAMI'])

Unnamed: 0,Address,Timezone,Country,StateCityPostcode,PhoneNumber,Location,StoreName,Brand,StoreNumber,OwnershipType
0,"{'zip': '331322028', 'address': '1050 Caribbea...",GMT-05:00 America/New_York,US,FL#MIAMI#331322028,,"{'lat': 25.78, 'lon': -80.18}",RCI Sunshine Cruise #2,Starbucks,23053-170652,Licensed
1,"{'zip': '33155', 'address': '6702 Bird Road, #...",GMT-05:00 America/New_York,US,FL#MIAMI#33155,3056614698,"{'lat': 25.73, 'lon': -80.3}",Bird Ludlam,Starbucks,22267-121004,Company Owned
2,"{'zip': '33137', 'address': '2937 Biscayne Blv...",GMT-05:00 America/New_York,US,FL#MIAMI#33137,305-573-0727,"{'lat': 25.81, 'lon': -80.19}",Biscayne Blvd & 30th,Starbucks,13824-104866,Company Owned
