In [1]:
import os
import configparser
import time
import boto3

In [2]:
# Read the configuration
config = configparser.ConfigParser()
config.read('dl.cfg')

['dl.cfg']

In [3]:
os.environ['AWS_ACCESS_KEY_ID']=config['AWS']['AWS_ACCESS_KEY_ID']
os.environ['AWS_SECRET_ACCESS_KEY']=config['AWS']['AWS_SECRET_ACCESS_KEY']

In [4]:
# S3 resource to access to buckets
s3 = boto3.resource('s3',
                    region_name = config['AWS']['REGION'],
                    aws_access_key_id = os.environ["AWS_ACCESS_KEY_ID"],
                    aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"]
                   )

In [5]:
# A simple function to explore given S3 bucket and key
def explore_data_on_S3(bucket, key, extension = None):
    num_files = 0
    total_size = 0
    time_start = time.time()
    # iterate over
    objs = s3.Bucket(bucket).objects.filter(Prefix = key)
    for obj in objs:
        if extension is None:
            num_files +=1
            total_size += obj.size
        elif obj.key.endswith(extension):
            num_files +=1
            total_size += obj.size
    # Report the details about the data
    total_size /= (1024*1024) # bytes to MB
    time_pass = time.time() - time_start
    print("Explored {}/{} on S3".format(bucket,key))
    print("Total number of files: {}".format(num_files))
    print("Total size of files: {:.2f} MB".format(total_size))
    print("Search duration: {:.2f} seconds".format(time_pass))

In [6]:
# Explore the user logs data
explore_data_on_S3("udacity-dend", "log_data", "json")

Explored udacity-dend/log_data on S3
Total number of files: 30
Total size of files: 3.58 MB
Search duration: 0.35 seconds


In [7]:
# Explore the songs data
explore_data_on_S3("udacity-dend", "song_data", "json")

Explored udacity-dend/song_data on S3
Total number of files: 14896
Total size of files: 3.54 MB
Search duration: 10.46 seconds
