# S3 instances and Boto3

In [2]:
!pip install Boto3

Collecting Boto3
  Downloading boto3-1.16.18-py2.py3-none-any.whl (129 kB)
[K     |████████████████████████████████| 129 kB 1.3 MB/s eta 0:00:01
Collecting s3transfer<0.4.0,>=0.3.0
  Downloading s3transfer-0.3.3-py2.py3-none-any.whl (69 kB)
[K     |████████████████████████████████| 69 kB 1.3 MB/s eta 0:00:01
[?25hCollecting botocore<1.20.0,>=1.19.18
  Downloading botocore-1.19.18-py2.py3-none-any.whl (6.8 MB)
[K     |████████████████████████████████| 6.8 MB 50 kB/s  eta 0:00:01     |███████████████▎                | 3.2 MB 1.4 MB/s eta 0:00:03     |█████████████████               | 3.6 MB 1.4 MB/s eta 0:00:03
Installing collected packages: botocore, s3transfer, Boto3
Successfully installed Boto3-1.16.18 botocore-1.19.18 s3transfer-0.3.3


In [1]:
import boto3

In [2]:
# first create a session with your access keys 

YOUR_ACCESS_KEY = ""
YOUR_SECRET_KEY = ""

session = boto3.Session(aws_access_key_id= YOUR_ACCESS_KEY, 
                        aws_secret_access_key= YOUR_SECRET_KEY)

In [3]:
# now set up a resource or a client in order to interact with your s3 instance. 
# resource --> high_level_api 
# client --> low_level api


s3 = session.resource("s3")
client = session.client("s3")


### Creating a new bucket

In [4]:
# using resource
bucket_1 = s3.create_bucket(Bucket = "n-amr-demo-bucket-1") 

# using client
bucket_2 = client.create_bucket(Bucket = "n-amr-demo-bucket-2")

bucket_3 = client.create_bucket(Bucket = "n-amr-demo-bucket-3")

### Displaying all buckets

In [5]:
# using resource
for bucket in s3.buckets.all():
    print(bucket.name)

n-amr-demo-bucket-1
n-amr-demo-bucket-2
n-amr-demo-bucket-3


In [6]:
# using client --> returns a dicitonary 
for bucket in client.list_buckets()["Buckets"]: 
    print(bucket["Name"])

n-amr-demo-bucket-1
n-amr-demo-bucket-2
n-amr-demo-bucket-3


### Deleting a bucket

In [7]:
# using client
client.delete_bucket(Bucket='n-amr-demo-bucket-3')


{'ResponseMetadata': {'RequestId': 'C2A42A0E3BBE7030',
  'HostId': 'tRvbqjmF66cb5iWeGAamvapvk7eexN4PGFyT7JYrhA7SCtOzOtrIEr6afLXtgxRBINfzLB7DdK0=',
  'HTTPStatusCode': 204,
  'HTTPHeaders': {'x-amz-id-2': 'tRvbqjmF66cb5iWeGAamvapvk7eexN4PGFyT7JYrhA7SCtOzOtrIEr6afLXtgxRBINfzLB7DdK0=',
   'x-amz-request-id': 'C2A42A0E3BBE7030',
   'date': 'Sun, 15 Nov 2020 13:07:04 GMT',
   'server': 'AmazonS3'},
  'RetryAttempts': 0}}

In [8]:
# we could sucessfully delete the bucket because there was no object in it, later we will see how to put an object into a bucjet
[bucket["Name"] for bucket in client.list_buckets()["Buckets"]]
    

['n-amr-demo-bucket-1', 'n-amr-demo-bucket-2']

### Uploading files to bucket 

In [9]:
import pandas as pd
df_1= pd.DataFrame({'col1': [1,2,3,4], 'col2': ['a1','a2','a3','a4']})
df_2 = pd.DataFrame({'col1': [5,6,7,8], 'col2': ['a2','a3','a4','a5']})


In [10]:
# using s3 --> let's put the first dataframe into our first bucket 
csv = df_1.to_csv()
put_object = bucket_1.put_object(ACL='private', Key="new-folder/test.csv", Body=csv)

In [11]:
# using client --> let's put the second dataframe into our first bucket, this time with public access
csv_2 = df_2.to_csv()
put_object = bucket_1.put_object(ACL='public-read', Key="test2.csv", Body=csv_2)

 ### Listing and accessing files in a bucket

In [12]:
# LISTING  method 1

#first select the bucket you want to look into using s3 

bucket = s3.Bucket("n-amr-demo-bucket-1")

for obj in bucket.objects.all():
    print(obj.key)
    


new-folder/test.csv
test2.csv


In [13]:
# we can also use list comprehensions
all_files = [obj.key for obj in bucket.objects.all()]
all_files

['new-folder/test.csv', 'test2.csv']

In [14]:
# LISTING method 2 --> using client
for obj in client.list_objects(Bucket = "n-amr-demo-bucket-1")["Contents"]:
    print(obj["Key"])



new-folder/test.csv
test2.csv


In [15]:
# What if we want to filter by prefix or suffix? We can use the filter method
some_files = [obj.key for obj in bucket.objects.filter(Prefix="new-folder")]
some_files

['new-folder/test.csv']

In [16]:
# ACCESSING method 1 
obj = bucket.Object('new-folder/test.csv')
obj

s3.Object(bucket_name='n-amr-demo-bucket-1', key='new-folder/test.csv')

In [17]:
# ACCESSING method 2
obj = s3.Object('n-amr-demo-bucket-1', 'test2.csv')
obj

s3.Object(bucket_name='n-amr-demo-bucket-1', key='test2.csv')

### Downloading or reading a file from a bucket


In [None]:
# downloading
s3.Bucket("n-amr-demo-bucket-1").download_file('test2.csv', #'file_destination')

In [18]:
# however, sometimes we only want to read the file w/o downloading it. In that case we can use the io library
import io
obj = s3.Object('n-amr-demo-bucket-1', 'test2.csv')

df = pd.read_csv(io.BytesIO(obj.get()['Body'].read()))
df.head()

Unnamed: 0.1,Unnamed: 0,col1,col2
0,0,5,a2
1,1,6,a3
2,2,7,a4
3,3,8,a5


### Deleting a file from a bucket

In [19]:
# this method allows to delete several objects through a dictionary. Here we delete only one

bucket = s3.Bucket("n-amr-demo-bucket-1")
response = bucket.delete_objects(
    Delete={
        'Objects': [
            {
                'Key': "test2.csv"
            }
        ]
    }
)

In [20]:
for obj in bucket.objects.all():
    print(obj.key)

new-folder/test.csv


In [21]:
# this client method allows the deletion of objects one at a time
response = client.delete_object(
    Bucket='n-amr-demo-bucket-1',
    Key='new-folder/test.csv',
)

In [22]:
# the bucket is now empty
for obj in bucket.objects.all():
    print(obj.key)