## Python script using Boto3 to interact with Amazon S3.


`Boto3` allows you to programatically interact with AWS services
`S3 - Simple Storage Service` - Amazon's clour storage solution.

In [2]:
import boto3
import pprint as pp        # pprint - pretty print displays comples data in a nicely formatted way

# Let's use Amazon S3
s3_resource = boto3.resource('s3')
s3_client = boto3.client('s3')

`s3_resource` - creates resource-level object for S3. It uses a higher-level abstraction giving access to classes like `Bucket`, `Object`

`s3_client` - creates a client-level object for S3. It's lower-level, and gives access to raw AWS API methods (`list_buckets`,`get_object`, etc)

### S3 RESOURCE BUCKETS

In [3]:
# S3 RESOURCE - OBJECT ORIENTED allows you to interact with BUCKETS and OBJECT classes, not just RAW data
for bucket in s3_resource.buckets.all():
    print(bucket.name)

assessment-dispatcher
assessment-dispatcher-test
assessmentbackup
aws-logs-135928476890-eu-central-1
bulk-policy-migration-135928476890
cloudfuntest
cyber10x-group1-bucket
data-402-final-project
data-eng-401-final-project
data-eng-project-master
data-eng-resources
data-preassignment-alien-project-group1
devops-bootcamp-shahrukh
devops-content
elasticbeanstalk-eu-west-1-135928476890
emiledevopstest556373
eng130-videos
florina-hmrc-remote-backend
graduate-market-analysis
imc.spartaglobal.academy
iotdatajoy2023
jenkins-build-test
kaki-udemy
kaki-work-space
profiles.spartaglobal.academy
shahrukhterraformremotebackend
sparta-backups
sparta-cyber-bucket
sparta-data
sparta-example-state
sparta-marketing
sparta-media
sparta-profiles
sparta-terraform-state
tech230-ramon1
udemy-lab-streaming-data-rahul


### S3 CLIENT BUCKETS

In [4]:
# S3 CLIENT - EXPLICIT - Provides DIRECT access to AWS services via RAW API Calls
bucket_list = s3_client.list_buckets()
pp.pprint(bucket_list)

for bucket in bucket_list['Buckets']:
    print(bucket['Name'])

{'Buckets': [{'CreationDate': datetime.datetime(2024, 11, 9, 3, 20, 37, tzinfo=tzutc()),
              'Name': 'assessment-dispatcher'},
             {'CreationDate': datetime.datetime(2024, 11, 12, 6, 55, 36, tzinfo=tzutc()),
              'Name': 'assessment-dispatcher-test'},
             {'CreationDate': datetime.datetime(2024, 11, 9, 3, 20, 42, tzinfo=tzutc()),
              'Name': 'assessmentbackup'},
             {'CreationDate': datetime.datetime(2024, 11, 12, 8, 20, 9, tzinfo=tzutc()),
              'Name': 'aws-logs-135928476890-eu-central-1'},
             {'CreationDate': datetime.datetime(2024, 9, 16, 9, 35, 28, tzinfo=tzutc()),
              'Name': 'bulk-policy-migration-135928476890'},
             {'CreationDate': datetime.datetime(2024, 12, 10, 16, 25, 1, tzinfo=tzutc()),
              'Name': 'cloudfuntest'},
             {'CreationDate': datetime.datetime(2024, 10, 30, 23, 26, 42, tzinfo=tzutc()),
              'Name': 'cyber10x-group1-bucket'},
             {'Crea

### LIST of the OBJECTS in the BUCKET data-eng-resources


`list_objects_v2` more recommended that `list_objects`

In [None]:
# Look inside the content of the bracket
bucket_name = 'data-eng-resources'
bucket_contents = s3_client.list_objects_v2(
    Bucket = bucket_name
    # more optionalargs
    # more args
)

pp.pprint(bucket_contents)

{'Contents': [{'ETag': '"bda64538cd4a8269836e4e09a787e93a"',
               'Key': 'Data401/Brian_Luna.csv',
               'LastModified': datetime.datetime(2024, 4, 4, 13, 56, 6, tzinfo=tzutc()),
               'Size': 800,
               'StorageClass': 'STANDARD'},
              {'ETag': '"bda64538cd4a8269836e4e09a787e93a"',
               'Key': 'Data401/Rahul.csv',
               'LastModified': datetime.datetime(2024, 4, 5, 9, 46, 58, tzinfo=tzutc()),
               'Size': 800,
               'StorageClass': 'STANDARD'},
              {'ETag': '"1df661b5d9c10b9264434fd252e08117"',
               'Key': 'Data401/jack.csv',
               'LastModified': datetime.datetime(2024, 3, 28, 17, 5, 18, tzinfo=tzutc()),
               'Size': 857,
               'StorageClass': 'STANDARD'},
              {'ETag': '"959f10cc0c3aa461a8afaed9035b6f45"',
               'Key': 'Data401/lihong.csv',
               'LastModified': datetime.datetime(2024, 3, 29, 16, 9, 6, tzinfo=tzutc()),
      

In [6]:
#  List of all objects in the bucket whose key start with Data250/.
bucket_name = 'data-eng-resources'
data250_contents = s3_client.list_objects_v2(
    Bucket = bucket_name,
    Prefix = 'Data250/',   
    MaxKeys = 100,          # Optional
    Delimiter = '/'         # Optional
)

pp.pprint(bucket_contents)

{'Contents': [{'ETag': '"bda64538cd4a8269836e4e09a787e93a"',
               'Key': 'Data401/Brian_Luna.csv',
               'LastModified': datetime.datetime(2024, 4, 4, 13, 56, 6, tzinfo=tzutc()),
               'Size': 800,
               'StorageClass': 'STANDARD'},
              {'ETag': '"bda64538cd4a8269836e4e09a787e93a"',
               'Key': 'Data401/Rahul.csv',
               'LastModified': datetime.datetime(2024, 4, 5, 9, 46, 58, tzinfo=tzutc()),
               'Size': 800,
               'StorageClass': 'STANDARD'},
              {'ETag': '"1df661b5d9c10b9264434fd252e08117"',
               'Key': 'Data401/jack.csv',
               'LastModified': datetime.datetime(2024, 3, 28, 17, 5, 18, tzinfo=tzutc()),
               'Size': 857,
               'StorageClass': 'STANDARD'},
              {'ETag': '"959f10cc0c3aa461a8afaed9035b6f45"',
               'Key': 'Data401/lihong.csv',
               'LastModified': datetime.datetime(2024, 3, 29, 16, 9, 6, tzinfo=tzutc()),
      

### Print bucket names from CLIENT INTERFACE

In [None]:
for object in bucket_contents['Contents']:
    print(object['Key'])

Data401/Brian_Luna.csv
Data401/Rahul.csv
Data401/jack.csv
Data401/lihong.csv
Data401/muhammad.csv
Data401/rahul_csv.csv
Data401/robert.csv
Data401/shivani.csv
Data401/shreya_jain.csv
Data401/usama.csv
Data402/
Data402/dataframes/
Data402/dataframes/KR-details.csv
Data402/dataframes/dafydd_dataframe.csv
Data402/dataframes/dataframe_yoonhee.csv
Data402/dataframes/happiness_yoonhee.csv
Data402/dataframes/jamesob-dataframe-upload.csv
Data402/dataframes/kyrun_dataframe.csv
Data402/dataframes/luis_dataframe.csv
Data402/dataframes/raj_dataframe.csv
Data402/dataframes/sabrina_dataframe.csv
Data402/dataframes/sams_test_csvfile
Data402/fish-market-dafydd.csv
Data402/fish-market-raj.csv
Data402/fish-market-sabrina.csv
Data402/fish_market_luis.csv
Data402/fish_market_yoonhee.csv
Data402/test/
Data402/test/KR-details-dict.json
Data402/test/KR-details.json
Data402/test/dafyddl-details-dict.json
Data402/test/dafyddl-details.json
Data402/test/jacqueline-details-dict.json
Data402/test/jacqueline-detail

In [8]:
#  List of all objects in the bucket whose key start with Python.
bucket_contents = s3_client.list_objects_v2(
    Bucket = bucket_name,
    Prefix = 'python'
)
pp.pprint(bucket_contents)

{'Contents': [{'ETag': '"d41d8cd98f00b204e9800998ecf8427e"',
               'Key': 'python/',
               'LastModified': datetime.datetime(2021, 3, 30, 7, 54, 22, tzinfo=tzutc()),
               'Size': 0,
               'StorageClass': 'STANDARD'},
              {'ETag': '"30476b4ef881aec0c26eccb422414e6e"',
               'Key': 'python/chatbot-intent.json',
               'LastModified': datetime.datetime(2021, 3, 30, 7, 54, 22, tzinfo=tzutc()),
               'Size': 69866,
               'StorageClass': 'STANDARD'},
              {'ETag': '"aa02eef2eae5d4e09bcd05e96bb58c51"',
               'Key': 'python/fish-market-mon.csv',
               'LastModified': datetime.datetime(2021, 3, 30, 7, 54, 22, tzinfo=tzutc()),
               'Size': 12530,
               'StorageClass': 'STANDARD'},
              {'ETag': '"389918f10b963f3d835f265ba67e7597"',
               'Key': 'python/fish-market-tues.csv',
               'LastModified': datetime.datetime(2021, 3, 30, 7, 54, 22, tzinf

### Use the boto3 RESOURCE interface to create a reference to a specific OBJECT (file) in a BUCKET in Amazon S3.

In [14]:
# Create a Bucket Object that has a method used to return its contents.
s3_resource = boto3.resource('s3')
bucket_name = 'data-eng-resources'

bucket = s3_resource.Bucket(bucket_name)
# print(bucket)

objects = bucket.objects
# print(objects)

contents = objects.all()
# print(contents)

for object in contents:
    print(object.key)
    


Data401/Brian_Luna.csv
Data401/Rahul.csv
Data401/jack.csv
Data401/lihong.csv
Data401/muhammad.csv
Data401/rahul_csv.csv
Data401/robert.csv
Data401/shivani.csv
Data401/shreya_jain.csv
Data401/usama.csv
Data402/
Data402/dataframes/
Data402/dataframes/KR-details.csv
Data402/dataframes/dafydd_dataframe.csv
Data402/dataframes/dataframe_yoonhee.csv
Data402/dataframes/happiness_yoonhee.csv
Data402/dataframes/jamesob-dataframe-upload.csv
Data402/dataframes/kyrun_dataframe.csv
Data402/dataframes/luis_dataframe.csv
Data402/dataframes/raj_dataframe.csv
Data402/dataframes/sabrina_dataframe.csv
Data402/dataframes/sams_test_csvfile
Data402/fish-market-dafydd.csv
Data402/fish-market-raj.csv
Data402/fish-market-sabrina.csv
Data402/fish_market_luis.csv
Data402/fish_market_yoonhee.csv
Data402/test/
Data402/test/KR-details-dict.json
Data402/test/KR-details.json
Data402/test/dafyddl-details-dict.json
Data402/test/dafyddl-details.json
Data402/test/jacqueline-details-dict.json
Data402/test/jacqueline-detail