In [1]:
import pandas as pd
from spdynamodb import DynamoTable
from time import sleep
import json
from decimal import Decimal

In [2]:
df = pd.read_csv('directory.zip', compression='zip')

In [4]:
df.head()

Unnamed: 0,Brand,Store Number,Store Name,Ownership Type,Street Address,City,State/Province,Country,Postcode,Phone Number,Timezone,Longitude,Latitude
0,Starbucks,47370-257954,"Meritxell, 96",Licensed,"Av. Meritxell, 96",Andorra la Vella,7,AD,AD500,376818720.0,GMT+1:00 Europe/Andorra,1.53,42.51
1,Starbucks,22331-212325,Ajman Drive Thru,Licensed,"1 Street 69, Al Jarf",Ajman,AJ,AE,,,GMT+04:00 Asia/Dubai,55.47,25.42
2,Starbucks,47089-256771,Dana Mall,Licensed,Sheikh Khalifa Bin Zayed St.,Ajman,AJ,AE,,,GMT+04:00 Asia/Dubai,55.47,25.39
3,Starbucks,22126-218024,Twofour 54,Licensed,Al Salam Street,Abu Dhabi,AZ,AE,,,GMT+04:00 Asia/Dubai,54.38,24.48
4,Starbucks,17127-178586,Al Ain Tower,Licensed,"Khaldiya Area, Abu Dhabi Island",Abu Dhabi,AZ,AE,,,GMT+04:00 Asia/Dubai,54.54,24.51


In [7]:
dt = DynamoTable()
try:
    dt.select_table('StarbucksLocations')
    print(dt)
except:
    dt.create_table(
        table_name='StarbucksLocations',
        partition_key='StoreNumber',
        partition_key_type='S',
        provisioned=False
    )

In [42]:
df_test = df.sample(1000)

In [43]:
def convert_key(row):
    return str(row['State/Province']).upper() + '#' \
            + str(row['City']).upper() + '#' \
            + str(row['Postcode']).upper()

df_test['StateCityPostcode'] = df_test.apply(lambda x: convert_key(x), axis=1)

In [44]:
df_test.head()

Unnamed: 0,Brand,Store Number,Store Name,Ownership Type,Street Address,City,State/Province,Country,Postcode,Phone Number,Timezone,Longitude,Latitude,StateCityPostcode
2861,Starbucks,35052-96715,金桥店,Joint Venture,"浦东新区, 碧云国际社区, 陈行路2388号1幢1层101室",上海市,31,CN,201206,021-50303537,GMT+08:00 Asia/Beijing,121.58,31.24,31#上海市#201206
9712,Starbucks,50001-272225,Senai Airport,Licensed,"Lot S11, Aero Mall, Senai International Airport",Johor Bahru,1,MY,81250,,GMT+08:00 Asia/Kuala_Lumpur,103.67,1.64,1#JOHOR BAHRU#81250
1820,Starbucks,29640-253024,Tours Des Voyageurs,Licensed,151 Chemin du Cure Deslauriers,Mont-Tremblant,QC,CA,J8E 1C9,,GMT-05:00 America/Montreal,-74.0,46.21,QC#MONT-TREMBLANT#J8E 1C9
6770,Starbucks,23607-231684,Sapporo Kanjo-dori Higashi,Joint Venture,5-1 Honcho 2jo 1chome Higashi-ku,Sapporo,1,JP,065-0042,,GMT+09:00 Asia/Tokyo,141.38,43.08,1#SAPPORO#065-0042
6416,Starbucks,17112-179325,Rest Area KM 62,Licensed,Jl. Tol Jakarta Cikampek KM 62,Karawang,JK,ID,41373,,GMT+07:00 Asia/Jakarta,107.4,-6.39,JK#KARAWANG#41373


In [47]:
df_test.rename(columns={'State/Province': 'State', 'Country Code': 'CountryCode', 'Store Number': 'StoreNumber',
                        'Store Name': 'StoreName', 'Ownership Type': 'OwnershipType', 
                        'Street Address': 'StreetAddress', 'Phone Number': 'PhoneNumber'}, inplace=True)

In [48]:
dt.batch_pandas(df_test)

In [49]:
dt.create_global_secondary_index(
    att_name="Country",
    att_type="S",
    sort_index="StateCityPostcode",
    sort_type="S",
    i_name="StoreLocationIndex"
)

In [50]:
status = dt.check_status_gsi()
if status == 'CREATING':
    print("Global secondary index is being created, this may take a few minutes...")
    while status == 'CREATING':
        status = dt.check_status_gsi()
        sleep(30)
print("Global secondary index created.")

Global secondary index is being created...
Global secondary index created.


In [52]:
dt.query_items('18297-164868', to_pandas=True)

Unnamed: 0,City,Timezone,Country,Longitude,Latitude,Postcode,Brand,State,StoreNumber,OwnershipType,StreetAddress,StateCityPostcode,PhoneNumber,StoreName
0,Laredo,GMT-06:00 America/Chicago,US,-99.48,27.61,78045,Starbucks,TX,18297-164868,Company Owned,10719 McPherson Road,TX#LAREDO#78045,956729-7828,Bob Bullock & McPherson


In [91]:
dt.query_partiql("SELECT * FROM <table> WHERE Country  = ? AND begins_with(StateCityPostcode, ?)", parameters=['US', 'NY#BROO'])

Unnamed: 0,City,Timezone,Country,Longitude,Latitude,Postcode,Brand,State,StoreNumber,OwnershipType,StreetAddress,StateCityPostcode,PhoneNumber,StoreName
0,Brooklyn,GMT-05:00 America/New_York,US,-73.98,40.68,112171450,Starbucks,NY,7704-13119,Company Owned,"139 Flatbush Ave, N/A",NY#BROOKLYN#112171450,718-789-3418,Flatbush Ave Atlantic Terminal
1,Brooklyn,GMT-05:00 America/New_York,US,-73.96,40.61,112292003,Starbucks,NY,17224-171944,Company Owned,1417 Kings Highway,NY#BROOKLYN#112292003,718-627-1016,1417 Kings Highway
2,Brooklyn,GMT-05:00 America/New_York,US,-73.96,40.72,11211,Starbucks,NY,22596-222524,Company Owned,154 N Seventh St,NY#BROOKLYN#11211,7183840152,154 N Seventh St
3,Brooklyn,GMT-05:00 America/New_York,US,-74.03,40.62,112096812,Starbucks,NY,7646-27128,Company Owned,9202 Third Avenue,NY#BROOKLYN#112096812,718-492-5331,3rd Ave & 92nd St
4,Brooklyn,GMT-05:00 America/New_York,US,-73.98,40.67,112152222,Starbucks,NY,7441-1994,Company Owned,166 7th Avenue,NY#BROOKLYN#112152222,718-369-3098,164 7th Avenue


In [111]:
dt.query_partiql("SELECT * FROM <table> WHERE Country  = ? AND contains(StateCityPostcode, ?)", parameters=['US', '#6222'])

Unnamed: 0,City,Timezone,Country,Longitude,Latitude,Postcode,Brand,State,StoreNumber,OwnershipType,StreetAddress,StateCityPostcode,PhoneNumber,StoreName
0,Belleville,GMT-06:00 America/Chicago,US,-90.01,38.51,622263104,Starbucks,IL,76860-116533,Licensed,5601 Belleville Crossing St,IL#BELLEVILLE#622263104,618-310-1901,Target Belleville T-2330


In [95]:
country = 'MX'
result = dt.query_partiql("SELECT * FROM <table> WHERE Country  = ?", parameters=[country])
print(f"In {country} there are {len(result)} Starbucks locations.")

In MX there are 22 Starbucks locations.
