# DynamoDB example

The Amazon DynamoDB Developer Guide uses sample tables to illustrate various aspects of DynamoDB.

| **Table Name** | **Type key**  | **Key 1**          | **Key 2**              |
|----------------|---------------|--------------------|------------------------|
| ProductCatalog | Simple key    | Id (Number)        |                        |
| Forum          | Simple key    | Name (String)      |                        |
| Thread         | Composite key | ForumName (String) | Subject (String)       |
| Reply          | Composite key | Id (String)        | ReplyDateTime (String) |

<br>
The Reply table has a global secondary index named PostedBy-Message-Index. This index will facilitate queries on two non-key attributes of the Reply table.

<br>
<br>

| **Index Name**         | **Primary key**   |
|------------------------|-------------------|
| PostedBy-Message-Index | PostedBy (String) |
|                        | Message (String)  |



In [1]:
import boto3

In [2]:
# Get the service resource.
dynamodb = boto3.resource('dynamodb')

## Create the ProductCatalog table.

In [3]:
table = dynamodb.create_table(
    TableName='ProductCatalog',
    KeySchema=[
        {
            'AttributeName': 'Id',
            'KeyType': 'HASH'
        }
    ],
    AttributeDefinitions=[
        {
            'AttributeName': 'Id',
            'AttributeType': 'S'
        },
    ],
    ProvisionedThroughput={
        'ReadCapacityUnits': 5,
        'WriteCapacityUnits': 5
    }
)
# Wait until the table exists.
table.wait_until_exists()

In [None]:
table.put_item(
   Item={
        'Id': 'ma30493ldkf03',
        'Title': 'Molloy',
        'ISBN': '978-0714503844',
        'Author': 'Samuel Beckett',
        'Price': 12,
        'Dimensions': '8.5 x 11.0 x 0.5',
        'PageCount': 500,
        'InPublication': True,
        'ProductCategory': 'Book'
    }
)

In [None]:
table.put_item(
   Item={
        'Id': 'doerminda',
        'Title': 'Divina Comedia',
        'ISBN': '458-0714503844',
        'Author': 'Dante Alighieri',
        'Price': 33,
        'Dimensions': '12 x 16.0 x 1.2',
        'PageCount': 1650,
        'InPublication': True,
        'ProductCategory': 'Book'
    }
)

In [None]:
table.put_item(
   Item={
        'Id': 'malonedk394kdj',
        'Title': 'Malone Muere',
        'ISBN': '343-545698874',
        'Author': 'Samuel Beckett',
        'Price': 8,
        'Dimensions': '6.5 x 11.0 x 0.4',
        'PageCount': 245,
        'InPublication': True,
        'ProductCategory': 'Book'
    }
)

In [None]:
table.put_item(
   Item={
        'Id': 'dkdfkl4958kdj89',
        'Title': '18-Bike-204',
        'Description': '205 Description',
        'BicycleType': 'Hybrid',
        "Brand": 'Brand-Company C',
        'Price': 500,
        'Color': {'Red', 'Black'},
        'ProductCategory': 'Bicycle'
    }
)

## Create the Forum table.

In [None]:
table = dynamodb.create_table(
    TableName='Forum',
    KeySchema=[
        {
            'AttributeName': 'Name',
            'KeyType': 'HASH'
        }
    ],
    AttributeDefinitions=[
        {
            'AttributeName': 'Name',
            'AttributeType': 'S'
        },
    ],
    ProvisionedThroughput={
        'ReadCapacityUnits': 5,
        'WriteCapacityUnits': 5
    }
)

# Wait until the table exists.
table.wait_until_exists()

In [None]:
table.put_item(
   Item={
        'Name': 'Amazon DynamoDB',
        'Category': 'Amazon Web Services"',
        'Threads': 2,
        'BicycleType': 'Hybrid',
        "Messages": 4,
        'Views': 1000,
    }
)

In [None]:
table.put_item(
   Item={
        'Name': 'Amazon S3',
        'Category': 'Amazon Web Services"',
    }
)

## Create the Thread table.

In [None]:
table = dynamodb.create_table(
    TableName='Thread',
    KeySchema=[
        {
            'AttributeName': 'ForumName',
            'KeyType': 'HASH'
        },
        {
            'AttributeName': 'Subject',
            'KeyType': 'RANGE'
        }
    ],
    AttributeDefinitions=[
        {
            'AttributeName': 'ForumName',
            'AttributeType': 'S'
        },
        {
            'AttributeName': 'Subject',
            'AttributeType': 'S'
        },
    ],
    ProvisionedThroughput={
        'ReadCapacityUnits': 5,
        'WriteCapacityUnits': 5
    }
)

# Wait until the table exists.
table.wait_until_exists()

In [None]:
table.put_item(
   Item={
       'ForumName': 'Amazon DynamoDB',
       'Subject': 'DynamoDB Thread 1',
       'Message': 'DynamoDB thread 1 message',
       'LastPostedBy': 'User A',
       'LastPostedDateTime': '2015-09-22T19:58:22.514Z',
       'Views': 0,
       'Replies': 0,
       'Answered': 0,
       'Tags': {'index', 'primarykey', 'table'}
    }
)

In [None]:
table.put_item(
   Item={
       'ForumName': 'Amazon DynamoDB',
       'Subject': 'DynamoDB Thread 2',
       'Message': 'DynamoDB thread 2 message',
       'LastPostedBy': 'User A',
       'LastPostedDateTime': '2015-09-15T19:58:22.514Z',
       'Views': 3,
       'Replies': 0,
       'Answered': 0,
       'Tags': {'items', 'attributes', 'throughput'}
    }
)

In [None]:
table.put_item(
   Item={
       'ForumName': 'Amazon S3',
       'Subject': 'S3 Thread 1',
       'Message': 'S3 thread 1 message',
       'LastPostedBy': 'User A',
       'LastPostedDateTime': '2015-09-29T19:58:22.514Z',
       'Views': 0,
       'Replies': 0,
       'Answered': 0,
       'Tags': {'largeobjects', 'multipart upload'}
    }
)

## Create the Reply table.

In [None]:
table = dynamodb.create_table(
    TableName='Reply',
    KeySchema=[
        {
            'AttributeName': 'Id',
            'KeyType': 'HASH'
        },
        {
            'AttributeName': 'ReplyDateTime',
            'KeyType': 'RANGE'
        }
    ],
    AttributeDefinitions=[
        {
            'AttributeName': 'Id',
            'AttributeType': 'S'
        },
        {
            'AttributeName': 'ReplyDateTime',
            'AttributeType': 'S'
        },
    ],
    ProvisionedThroughput={
        'ReadCapacityUnits': 5,
        'WriteCapacityUnits': 5
    }
)

# Wait until the table exists.
table.wait_until_exists()

In [None]:
table.put_item(
   Item={
       'Id': 'Amazon DynamoDB#DynamoDB Thread 1',
       'ReplyDateTime': '2015-09-15T19:58:22.947Z',
       'Message': 'DynamoDB Thread 1 Reply 1 text',
       'PostedBy': 'User A'
    }
)

In [None]:
table.put_item(
   Item={
       'Id': 'Amazon DynamoDB#DynamoDB Thread 1',
       'ReplyDateTime': '2015-09-22T19:58:22.947Z',
       'Message': 'DynamoDB Thread 1 Reply 2 text',
       'PostedBy': 'User B'
    }
)

In [None]:
table.put_item(
   Item={
       'Id': 'Amazon DynamoDB#DynamoDB Thread 2',
       'ReplyDateTime': '2015-09-29T19:58:22.947Z',
       'Message': 'DynamoDB Thread 2 Reply 1 text',
       'PostedBy': 'User A'
    }
)

In [None]:
table.put_item(
   Item={
       'Id': 'Amazon DynamoDB#DynamoDB Thread 2',
       'ReplyDateTime': '2015-10-05T19:58:22.947Z',
       'Message': 'DynamoDB Thread 2 Reply 2 text',
       'PostedBy': 'User B'
    }
)

In [None]:
for table in dynamodb.tables.all():
    t = table
    print(f"Table: {t.name}")
    print(f"Items: {t.item_count}")
    print(t.key_schema)
    print("")

## Batch writer

In [12]:
import pandas as pd
import requests # to get image from the web
import shutil # to save it locally
from hashlib import sha256
import os

In [13]:
df = pd.read_csv("books.csv",  
                 sep=";",
                 on_bad_lines = 'skip',
                 low_memory=False,
                 encoding="latin-1"
)

In [14]:
df.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.01.THUMBZZZ.jpg,http://images.amazon.com/images/P/0195153448.01.MZZZZZZZ.jpg,http://images.amazon.com/images/P/0195153448.01.LZZZZZZZ.jpg
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.01.THUMBZZZ.jpg,http://images.amazon.com/images/P/0002005018.01.MZZZZZZZ.jpg,http://images.amazon.com/images/P/0002005018.01.LZZZZZZZ.jpg
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.01.THUMBZZZ.jpg,http://images.amazon.com/images/P/0060973129.01.MZZZZZZZ.jpg,http://images.amazon.com/images/P/0060973129.01.LZZZZZZZ.jpg
3,374157065,Flu: The Story of the Great Influenza Pandemic of 1918 and the Search for the Virus That Caused It,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.01.THUMBZZZ.jpg,http://images.amazon.com/images/P/0374157065.01.MZZZZZZZ.jpg,http://images.amazon.com/images/P/0374157065.01.LZZZZZZZ.jpg
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.01.THUMBZZZ.jpg,http://images.amazon.com/images/P/0393045218.01.MZZZZZZZ.jpg,http://images.amazon.com/images/P/0393045218.01.LZZZZZZZ.jpg


In [15]:
df_s = df.sample(500)

In [16]:
table = dynamodb.Table(name='ProductCatalog')

In [18]:
!mkdir front_book

In [19]:
def check_digit_10(isbn):
    assert len(isbn) == 9
    sum = 0
    for i in range(len(isbn)):
        c = int(isbn[i])
        w = i + 1
        sum += w * c
    r = sum % 11
    if r == 10: return 'X'
    else: return str(r)

def check_digit_13(isbn):
    assert len(isbn) == 12
    sum = 0
    for i in range(len(isbn)):
        c = int(isbn[i])
        if i % 2: w = 3
        else: w = 1
        sum += w * c
    r = 10 - (sum % 10)
    if r == 10: return '0'
    else: return str(r)

def isbn_convert(isbn):
    if len(isbn) == 10:
        p = '978' + isbn[:-1]
        check = check_digit_13(p)
        isbn = f"{p[:3]}-{p[3:12]}{check}"
    return isbn      

In [20]:
# Let's use Amazon S3
s3 = boto3.resource('s3')
bucket_name = 'studio-lab-300-220'
m = sha256()
increment = 30000

In [21]:
with table.batch_writer() as batch:
    for i in range(len(df_s)):
        image_url = df_s.iloc[i][7]
        m.update(image_url.encode())
        ext_file = df_s.iloc[4][7][-4:]
        folder = "front_book/"
        filename = folder + m.hexdigest() + ext_file
        id_code = "A01-" + str(i + increment).zfill(8)
        item={
            'Id': id_code,
            'Title': df_s.iloc[i][1],
            'ISBN': isbn_convert(df_s.iloc[i][0]),
            'Authors': df_s.iloc[i][2],
            'YearOfPublication': df_s.iloc[i][3],
            'Publisher': df_s.iloc[i][4]
        }
        r = requests.get(image_url, stream = True)
        if r.status_code == 200:
            # Set decode_content value to True, 
            # otherwise the downloaded image file's size will be zero.
            r.raw.decode_content = True
          
            # Open a local file with wb ( write binary ) permission.
            with open(filename,'wb') as f:
                shutil.copyfileobj(r.raw, f)
            
            file_size = os.path.getsize(filename)
            if file_size > 15000:
                with open(filename, 'rb') as fb:
                    s3.Bucket(bucket_name).put_object(Key=filename, Body=fb)
                item['ImageCode'] = filename
        batch.put_item(Item=item)

KeyboardInterrupt: 

In [8]:
df_gr = pd.read_csv("books_gr.csv", on_bad_lines='skip')

In [9]:
df_gr.head()

Unnamed: 0,bookID,title,authors,average_rating,isbn,isbn13,language_code,num_pages,ratings_count,text_reviews_count,publication_date,publisher
0,1,Harry Potter and the Half-Blood Prince (Harry Potter #6),J.K. Rowling/Mary GrandPré,4.57,0439785960,9780439785969,eng,652,2095690,27591,9/16/2006,Scholastic Inc.
1,2,Harry Potter and the Order of the Phoenix (Harry Potter #5),J.K. Rowling/Mary GrandPré,4.49,0439358078,9780439358071,eng,870,2153167,29221,9/1/2004,Scholastic Inc.
2,4,Harry Potter and the Chamber of Secrets (Harry Potter #2),J.K. Rowling,4.42,0439554896,9780439554893,eng,352,6333,244,11/1/2003,Scholastic
3,5,Harry Potter and the Prisoner of Azkaban (Harry Potter #3),J.K. Rowling/Mary GrandPré,4.56,043965548X,9780439655484,eng,435,2339585,36325,5/1/2004,Scholastic Inc.
4,8,Harry Potter Boxed Set Books 1-5 (Harry Potter #1-5),J.K. Rowling/Mary GrandPré,4.78,0439682584,9780439682589,eng,2690,41428,164,9/13/2004,Scholastic


In [6]:
df_gr = df_gr.sample(5000)

In [10]:
increment = 20000
with table.batch_writer() as batch:
    for i in range(len(df_gr)):
        id_code = "A01-" + str(i + increment).zfill(8)
        isbn = str(df_gr.iloc[i][5])[:3] \
            + "-" \
            + str(df_gr.iloc[i][5])[3:]
        item={
            'Id': id_code,
            'Title': df_gr.iloc[i][1],
            'ISBN': isbn,
            'Authors': df_gr.iloc[i][2],
            'YearOfPublication': df_gr.iloc[i][10][-4:],
            'Publisher': df_gr.iloc[i][11],
            'LanguageCode': df_gr.iloc[i][6],
            'PageCount': int(df_gr.iloc[i][7]),
            'RatingsCount': int(df_gr.iloc[i][8]),
            'TextReviewsCount': int(df_gr.iloc[i][9]),
            'AverageRating': str(df_gr.iloc[i][3])
        }
        batch.put_item(Item= item)