In [1]:
import pandas as pd
from spdynamodb import DynamoTable
from time import sleep
import json
import time
from decimal import Decimal

In [2]:
#dt = DynamoTable(profile_name='my-profile')
dt=DynamoTable()
try:
    dt.select_table('FooBarTable')
    print(dt)
except:
    dt.create_table(
        table_name='FooBarTable',
        partition_key='PK',
        partition_key_type='S',
        sort_key='SK',
        sort_key_type='S',
    )

- Table name: FooBarTable            
- Table arn: arn:aws:dynamodb:us-east-1:089715336747:table/FooBarTable            
- Table creation: 2023-05-09 13:52:59.830000-03:00            
- [{'AttributeName': 'PK', 'KeyType': 'HASH'}, {'AttributeName': 'SK', 'KeyType': 'RANGE'}]            
- [{'AttributeName': 'GSI1-PK', 'AttributeType': 'S'}, {'AttributeName': 'GSI1-SK', 'AttributeType': 'S'}, {'AttributeName': 'GSI2-SK', 'AttributeType': 'S'}, {'AttributeName': 'GSI3-SK', 'AttributeType': 'N'}, {'AttributeName': 'PK', 'AttributeType': 'S'}, {'AttributeName': 'SK', 'AttributeType': 'S'}, {'AttributeName': 'purchaseDate', 'AttributeType': 'S'}]            
- Point-in-time recovery status: DISABLED  |  Delete protection: True


### Create 3 global secondary indexes

In [None]:
# 1st Global Secondary Index
dt.create_global_secondary_index(
    att_name="GSI1-PK",
    att_type="S",
    sort_index="GSI1-SK",
    sort_type="S",
    i_name="GSI1"
)

status = dt.check_status_gsi()
if status == 'CREATING':
    print("1st global secondary index is being created, this may take a few minutes...")
    start = time.time()
    while status == 'CREATING':
        status = dt.check_status_gsi()
        sleep(30)
end = time.time()
minute = (end - start) / 60
print("Global secondary index created. Time elapsed: {0:.2f} minute".format(minute))

# 2nd Global Secondary Index
dt.create_global_secondary_index(
    att_name="GSI1-PK",
    att_type="S",
    sort_index="GSI2-SK",
    sort_type="S",
    i_name="GSI2"
)

status = dt.check_status_gsi()
if status == 'CREATING':
    print("2nd global secondary index is being created, this may take a few minutes...")
    start = time.time()
    while status == 'CREATING':
        status = dt.check_status_gsi()
        sleep(30)
end = time.time()
minute = (end - start) / 60
print("Global secondary index created. Time elapsed: {0:.2f} minute".format(minute))

# 3rd Global Secondary Index
dt.create_global_secondary_index(
    att_name="GSI1-PK",
    att_type="S",
    sort_index="GSI3-SK",
    sort_type="N",
    i_name="GSI3"
)

status = dt.check_status_gsi()
if status == 'CREATING':
    print("3rd global secondary index is being created, this may take a few minutes...")
    start = time.time()
    while status == 'CREATING':
        status = dt.check_status_gsi()
        sleep(30)
end = time.time()
minute = (end - start) / 60
print("Global secondary index created. Time elapsed: {0:.2f} minute".format(minute))

### Users Table

In [None]:
df_users = pd.read_csv('DesigningDynamoDBTable-1/users.csv')

df_users['PK'] = 'USER#' + df_users['email'].astype(str)
df_users['SK'] = 'USER#' + df_users['email'].astype(str)
df_users.head()

### Orders Table

In [6]:
df_orders = pd.read_csv('DesigningDynamoDBTable-1/orders.csv')

df_orders['PK'] = 'ORDER#' + df_orders['orderId'].astype(str)
df_orders['SK'] = 'ORDER#' + df_orders['orderId'].astype(str)
df_orders['GSI1-PK'] = 'USER#' + df_orders['userId'].astype(str)
df_orders['GSI1-SK'] = 'OSTATUS#' + df_orders['status'].astype(str)
df_orders.head()

Unnamed: 0,orderId,userId,purchaseDate,status,totalAmount,totalItems,PK,SK,GSI1-PK,GSI1-SK
0,1001,john@example.com,2022-01-01,completed,100.0,2,ORDER#1001,ORDER#1001,USER#john@example.com,OSTATUS#completed
1,1002,jane@example.com,2022-01-02,completed,50.0,1,ORDER#1002,ORDER#1002,USER#jane@example.com,OSTATUS#completed
2,1003,smith@example.com,2022-01-03,shipped,75.0,3,ORDER#1003,ORDER#1003,USER#smith@example.com,OSTATUS#shipped
3,1004,doe@example.com,2022-01-04,pending,20.0,1,ORDER#1004,ORDER#1004,USER#doe@example.com,OSTATUS#pending
4,1005,mike@example.com,2022-01-05,completed,30.0,2,ORDER#1005,ORDER#1005,USER#mike@example.com,OSTATUS#completed


In [None]:
df_orders

### Order details Table

In [None]:
df_order_details = pd.read_csv('DesigningDynamoDBTable-1/order_items.csv')

df_order_details['orderId'] = df_order_details['orderId'] + 1000
df_order_details['PK'] = 'ORDER#' + df_order_details['orderId'].astype(str)
df_order_details['SK'] = 'PRODUCT#' + df_order_details['productId'].astype(str)
df_order_details.head()

### Products Table

In [None]:
df_products = pd.read_csv('DesigningDynamoDBTable-1/products.csv')
values = [100,101,102,200,201,202,301,400,401,402,500,501,502,601,701,702,801,802,900,902]
df_products['productId'] = values
df_products['PK'] = 'PRODUCT#' + df_products['productId'].astype(str)
df_products['SK'] = 'PRODUCT#' + df_products['productId'].astype(str)
df_products['GSI1-PK'] = 'PRODUCT'
df_products['GSI1-SK'] = 'PNAME#' + df_products['name'].astype(str).str.lower()
df_products['GSI2-SK'] =  df_products['category'].astype(str).str.lower()
df_products['GSI3-SK'] =  df_products['price']
df_products.head()

### Add data to DynamoDB

In [None]:
dt.batch_pandas(dataframe=df_users)

In [75]:
dt.batch_pandas(dataframe=df_orders)

In [None]:
dt.batch_pandas(dataframe=df_order_details)

In [None]:
dt.batch_pandas(dataframe=df_products)

### Querying the table

In [3]:
dt.query_items(query="ORDER#1009", to_pandas=True)

Unnamed: 0,purchaseDate,totalAmount,orderId,userId,status,GSI1-SK,SK,GSI1-PK,PK,totalItems,quantity,productId
0,2022-01-09,60,1009,dave@example.com,completed,OSTATUS#completed,ORDER#1009,USER#dave@example.com,ORDER#1009,3.0,,
1,,27,1009,user9@mail.com,,,PRODUCT#901,,ORDER#1009,,2.0,901.0
2,,21,1009,user9@mail.com,,,PRODUCT#902,,ORDER#1009,,1.0,902.0


In [1]:
from importlib import reload
import spdynamodb
reload(spdynamodb)
reload(spdynamodb._queries)
from spdynamodb import DynamoTable
dt = DynamoTable()
dt.select_table('FooBarTable')

In [40]:
ts = pd.Timestamp('2020-03-14T15:32:52.192548651')
ts.isoformat()
ts.isoformat(timespec='seconds')

'2020-03-14T15:32:52'

In [41]:
dt.table.put_item(
    Item={
        'PK': 'ORDER#1011',
        'SK': 'ORDER#1011',
        'purchaseDate': '2020-03-14T15:32:52'
    }
)
dt.table.put_item(
    Item={
        'PK': 'ORDER#1011',
        'SK': 'ORDER#1012',
        'purchaseDate': '2020-03-15T15:32:52'
    }
)
dt.table.put_item(
    Item={
        'PK': 'ORDER#1011',
        'SK': 'ORDER#1013',
        'purchaseDate': '2020-03-17T15:32:52'
    }
)
dt.table.put_item(
    Item={
        'PK': 'ORDER#1011',
        'SK': 'ORDER#1014',
        'purchaseDate': '2020-03-19T15:32:52'
    }
)

{'ResponseMetadata': {'RequestId': '6TU5IQ4VAHLIFCKRGNG0AM49UVVV4KQNSO5AEMVJF66Q9ASUAAJG',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'server': 'Server',
   'date': 'Thu, 11 May 2023 21:29:32 GMT',
   'content-type': 'application/x-amz-json-1.0',
   'content-length': '2',
   'connection': 'keep-alive',
   'x-amzn-requestid': '6TU5IQ4VAHLIFCKRGNG0AM49UVVV4KQNSO5AEMVJF66Q9ASUAAJG',
   'x-amz-crc32': '2745614147'},
  'RetryAttempts': 0}}

In [36]:
import re
query = '8881254_5564555555'
re.search(r'[0-9]_[0-9]', query)

<re.Match object; span=(6, 9), match='4_5'>

In [23]:
query = '2015-01-01T00:00:00_2023-03-17T23:59:59'
dt.query(pk_value='ORDER#1011', sk_value=query, consumed_capacity='TOTAL', index_name='GSI4', to_pandas=True)

Consumed Capacity: 0.5


Unnamed: 0,purchaseDate,SK,PK
0,2020-03-14T15:32:52,ORDER#1011,ORDER#1011
1,2020-03-15T15:32:52,ORDER#1012,ORDER#1011
2,2020-03-17T15:32:52,ORDER#1013,ORDER#1011
3,2020-03-19T15:32:52,ORDER#1014,ORDER#1011


In [17]:
dt.query(pk_value='PRODUCT', sk_value="==59.99", consumed_capacity='TOTAL', index_name='GSI3', to_pandas=True)

Consumed Capacity: 0.5


Unnamed: 0,GSI1-SK,GSI1-PK,name,GSI3-SK,GSI2-SK,category,images,SK,amountSold,description,price,PK,writersId,productId
0,PNAME#product 8,PRODUCT,Product 8,59.99,category b,Category B,"image24.jpg,image25.jpg,image26.jpg",PRODUCT#400,60,Description for Product 8,59.99,PRODUCT#400,Writer 8,400
1,PNAME#product 18,PRODUCT,Product 18,59.99,category b,Category B,"image62.jpg,image63.jpg,image64.jpg,image65.jpg",PRODUCT#802,60,Description for Product 18,59.99,PRODUCT#802,Writer 8,802


In [4]:
dt.query(pk_value='PRODUCT#301', sk_value="PRODUCT*", consumed_capacity='TOTAL', to_pandas=True)

Consumed Capacity: 0.5


Unnamed: 0,GSI1-SK,GSI1-PK,name,GSI3-SK,GSI2-SK,category,images,SK,amountSold,description,price,PK,writersId,productId
0,PNAME#product 7,PRODUCT,Product 7,49.99,category c,Category C,"image20.jpg,image21.jpg,image22.jpg,image23.jpg",PRODUCT#301,90,Description for Product 7,49.99,PRODUCT#301,Writer 7,301
