In [1]:
import pandas as pd
from spdynamodb import DynamoTable
from time import sleep
import json
import time
from decimal import Decimal

In [2]:
dt = DynamoTable()
try:
    dt.select_table('FooBarTable')
    print(dt)
except:
    dt.create_table(
        table_name='FooBarTable',
        partition_key='PK',
        partition_key_type='S',
        sort_key='SK',
        sort_key_type='S',
    )

- Table name: FooBarTable            
- Table arn: arn:aws:dynamodb:us-east-1:089715336747:table/FooBarTable            
- Table creation: 2023-05-09 13:52:59.830000-03:00            
- [{'AttributeName': 'PK', 'KeyType': 'HASH'}, {'AttributeName': 'SK', 'KeyType': 'RANGE'}]            
- [{'AttributeName': 'GSI1-PK', 'AttributeType': 'S'}, {'AttributeName': 'GSI1-SK', 'AttributeType': 'S'}, {'AttributeName': 'GSI2-SK', 'AttributeType': 'S'}, {'AttributeName': 'GSI3-SK', 'AttributeType': 'N'}, {'AttributeName': 'PK', 'AttributeType': 'S'}, {'AttributeName': 'SK', 'AttributeType': 'S'}]            
- Point-in-time recovery status: DISABLED  |  Delete protection: True


### Create 3 global secondary indexes

In [None]:
# 1st Global Secondary Index
dt.create_global_secondary_index(
    att_name="GSI1-PK",
    att_type="S",
    sort_index="GSI1-SK",
    sort_type="S",
    i_name="GSI1"
)

status = dt.check_status_gsi()
if status == 'CREATING':
    print("1st global secondary index is being created, this may take a few minutes...")
    start = time.time()
    while status == 'CREATING':
        status = dt.check_status_gsi()
        sleep(30)
end = time.time()
minute = (end - start) / 60
print("Global secondary index created. Time elapsed: {0:.2f} minute".format(minute))

# 2nd Global Secondary Index
dt.create_global_secondary_index(
    att_name="GSI1-PK",
    att_type="S",
    sort_index="GSI2-SK",
    sort_type="S",
    i_name="GSI2"
)

status = dt.check_status_gsi()
if status == 'CREATING':
    print("2nd global secondary index is being created, this may take a few minutes...")
    start = time.time()
    while status == 'CREATING':
        status = dt.check_status_gsi()
        sleep(30)
end = time.time()
minute = (end - start) / 60
print("Global secondary index created. Time elapsed: {0:.2f} minute".format(minute))

# 3rd Global Secondary Index
dt.create_global_secondary_index(
    att_name="GSI1-PK",
    att_type="S",
    sort_index="GSI3-SK",
    sort_type="N",
    i_name="GSI3"
)

status = dt.check_status_gsi()
if status == 'CREATING':
    print("3rd global secondary index is being created, this may take a few minutes...")
    start = time.time()
    while status == 'CREATING':
        status = dt.check_status_gsi()
        sleep(30)
end = time.time()
minute = (end - start) / 60
print("Global secondary index created. Time elapsed: {0:.2f} minute".format(minute))

### Users Table

In [None]:
df_users = pd.read_csv('DesigningDynamoDBTable-1/users.csv')

df_users['PK'] = 'USER#' + df_users['email'].astype(str)
df_users['SK'] = 'USER#' + df_users['email'].astype(str)
df_users.head()

### Orders Table

In [74]:
df_orders = pd.read_csv('DesigningDynamoDBTable-1/orders.csv')

df_orders['PK'] = 'ORDER#' + df_orders['orderId'].astype(str)
df_orders['SK'] = 'ORDER#' + df_orders['orderId'].astype(str)
df_orders['GSI1-PK'] = 'USER#' + df_orders['userId'].astype(str)
df_orders['GSI1-SK'] = 'OSTATUS#' + df_orders['status'].astype(str)
df_orders.head()

Unnamed: 0,orderId,userId,purchaseDate,status,totalAmount,totalItems,PK,SK,GSI1-PK,GSI1-SK
0,1001,john@example.com,2022-01-01,completed,100.0,2,ORDER#1001,ORDER#1001,USER#john@example.com,OSTATUS#completed
1,1002,jane@example.com,2022-01-02,completed,50.0,1,ORDER#1002,ORDER#1002,USER#jane@example.com,OSTATUS#completed
2,1003,smith@example.com,2022-01-03,shipped,75.0,3,ORDER#1003,ORDER#1003,USER#smith@example.com,OSTATUS#shipped
3,1004,doe@example.com,2022-01-04,pending,20.0,1,ORDER#1004,ORDER#1004,USER#doe@example.com,OSTATUS#pending
4,1005,mike@example.com,2022-01-05,completed,30.0,2,ORDER#1005,ORDER#1005,USER#mike@example.com,OSTATUS#completed


### Order details Table

In [None]:
df_order_details = pd.read_csv('DesigningDynamoDBTable-1/order_items.csv')

df_order_details['orderId'] = df_order_details['orderId'] + 1000
df_order_details['PK'] = 'ORDER#' + df_order_details['orderId'].astype(str)
df_order_details['SK'] = 'PRODUCT#' + df_order_details['productId'].astype(str)
df_order_details.head()

### Products Table

In [None]:
df_products = pd.read_csv('DesigningDynamoDBTable-1/products.csv')
values = [100,101,102,200,201,202,301,400,401,402,500,501,502,601,701,702,801,802,900,902]
df_products['productId'] = values
df_products['PK'] = 'PRODUCT#' + df_products['productId'].astype(str)
df_products['SK'] = 'PRODUCT#' + df_products['productId'].astype(str)
df_products['GSI1-PK'] = 'PRODUCT'
df_products['GSI1-SK'] = 'PNAME#' + df_products['name'].astype(str).str.lower()
df_products['GSI2-SK'] =  df_products['category'].astype(str).str.lower()
df_products['GSI3-SK'] =  df_products['price']
df_products.head()

### Add data to DynamoDB

In [None]:
dt.batch_pandas(dataframe=df_users)

In [75]:
dt.batch_pandas(dataframe=df_orders)

In [None]:
dt.batch_pandas(dataframe=df_order_details)

In [None]:
dt.batch_pandas(dataframe=df_products)

### Querying the table

In [65]:
dt.query_items(query="ORDER#1009", to_pandas=True)

Unnamed: 0,purchaseDate,GSI1PK,totalAmount,orderId,userId,status,SK,GSI1SK,PK,totalItems,quantity,productId
0,2022-01-09,USER#dave@example.com,60,1009,dave@example.com,completed,ORDER#1009,OSTATUS#completed,ORDER#1009,3.0,,
1,,,27,1009,user9@mail.com,,PRODUCT#901,,ORDER#1009,,2.0,901.0
2,,,21,1009,user9@mail.com,,PRODUCT#902,,ORDER#1009,,1.0,902.0


In [67]:
dt.get_item(pk_value='PRODUCT#502', sk_value='PRODUCT#502')

{'GSI1-SK': 'PNAME#product 13',
 'GSI1-PK': 'PRODUCT',
 'name': 'Product 13',
 'GSI3-SK': 9.99,
 'GSI2-SK': 'category c',
 'category': 'Category C',
 'images': 'image41.jpg,image42.jpg,image43.jpg,image44.jpg',
 'SK': 'PRODUCT#502',
 'amountSold': 200,
 'description': 'Description for Product 13',
 'price': 9.99,
 'PK': 'PRODUCT#502',
 'writersId': 'Writer 3',
 'productId': 502}

In [None]:
# __ begin_with
# 304_320 between
# < min
# <= min equal
# > max
# >= max equal
# == equal

In [94]:
help(Key)

Help on class Key in module boto3.dynamodb.conditions:

class Key(AttributeBase)
 |  Key(name)
 |  
 |  Method resolution order:
 |      Key
 |      AttributeBase
 |      builtins.object
 |  
 |  Data and other attributes defined here:
 |  
 |  __slotnames__ = []
 |  
 |  ----------------------------------------------------------------------
 |  Methods inherited from AttributeBase:
 |  
 |  __and__(self, value)
 |  
 |  __eq__(self, other)
 |      Return self==value.
 |  
 |  __init__(self, name)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  __invert__(self)
 |  
 |  __ne__(self, other)
 |      Return self!=value.
 |  
 |  __or__(self, value)
 |      Return self|value.
 |  
 |  begins_with(self, value)
 |      Creates a condition where the attribute begins with the value.
 |      
 |      :param value: The value that the attribute begins with.
 |  
 |  between(self, low_value, high_value)
 |      Creates a condition where the attribute is greater than

In [97]:
from boto3.dynamodb.conditions import Key, Attr
qry = Key('GSI1-PK').eq('PRODUCT') & Key('GSI3-SK').lt(5)
dt.table.query(
    IndexName="GSI3",
    KeyConditionExpression=qry
)

{'Items': [{'GSI1-SK': 'PNAME#product 19',
   'GSI1-PK': 'PRODUCT',
   'name': 'Product 19',
   'GSI3-SK': Decimal('4.99'),
   'GSI2-SK': 'category c',
   'category': 'Category C',
   'images': 'image66.jpg,image67.jpg,image68.jpg,image69.jpg,image70.jpg',
   'SK': 'PRODUCT#900',
   'amountSold': Decimal('150'),
   'description': 'Description for Product 19',
   'price': Decimal('4.99'),
   'PK': 'PRODUCT#900',
   'writersId': 'Writer 9',
   'productId': Decimal('900')},
  {'GSI1-SK': 'PNAME#product 9',
   'GSI1-PK': 'PRODUCT',
   'name': 'Product 9',
   'GSI3-SK': Decimal('4.99'),
   'GSI2-SK': 'category a',
   'category': 'Category A',
   'images': 'image27.jpg,image28.jpg,image29.jpg,image30.jpg',
   'SK': 'PRODUCT#401',
   'amountSold': Decimal('150'),
   'description': 'Description for Product 9',
   'price': Decimal('4.99'),
   'PK': 'PRODUCT#401',
   'writersId': 'Writer 9',
   'productId': Decimal('401')}],
 'Count': 2,
 'ScannedCount': 2,
 'ResponseMetadata': {'RequestId': 'UA

In [None]:
dt.table.query(
    IndexName="GSI1",
    KeyConditionExpression=Key('GSI1-PK').eq('