In [3]:
import boto3
from smart_open import smart_open

# Listing Existing Buckets

In [4]:
s3 = boto3.client('s3')
response = s3.list_buckets() # Retrieve the list of existing buckets

In [5]:
for bucket in response['Buckets']:
    print(f'  {bucket["Name"]}') # Output the bucket names

  bmsorganic
  copppp
  isingmodeldata
  onurkaraphys.com
  organic-optical-recognition
  orgo-repos-git
  sommd-demo1.com
  sommdwelcome.com
  tbingol.com
  tests3csv
  theochem.com
  www.onurkaraphys.com
  www.sommd-demo1.com
  www.sommdwelcome.com
  www.tbingol.com
  www.theochem.com


# Simplest Connection Option - To A Single Bucket and Item

In [46]:
s3 = boto3.resource('s3')
bucket = s3.Bucket('tests3csv')
print(bucket)

s3.Bucket(name='tests3csv')


In [47]:
obj = s3.Object('tests3csv', 'smoking.csv')
body = obj.get()['Body'].read()
print(body)

b'Id,Age,FEV,Height,Sex,Smoker\n301,9,1.708,57,Female,Non\n451,8,1.724,67.5,Female,Non\n501,7,1.72,54.5,Female,Non\n642,9,1.558,53,Male,Non\n901,9,1.895,57,Male,Non\n1701,8,2.336,61,Female,Non\n1752,6,1.919,58,Female,Non\n1753,6,1.415,56,Female,Non\n1901,8,1.987,58.5,Female,Non\n1951,9,1.942,60,Female,Non\n1952,6,1.602,53,Female,Non\n2001,8,1.735,54,Male,Non\n2101,8,2.193,58.5,Female,Non\n2401,8,2.118,60.5,Male,Non\n3102,8,2.258,58,Male,Non\n3142,7,1.932,53,Male,Non\n3541,5,1.472,50,Male,Non\n3551,6,1.878,53,Female,Non\n4201,9,2.352,59,Male,Non\n4301,9,2.604,61.5,Male,Non\n4351,5,1.4,49,Female,Non\n5151,5,1.256,52.5,Female,Non\n5152,4,0.839,48,Female,Non\n5201,7,2.578,62.5,Male,Non\n5601,9,2.988,65,Female,Non\n5642,3,1.404,51.5,Male,Non\n5702,9,2.348,60,Male,Non\n6042,5,1.755,52,Male,Non\n6101,8,2.98,60,Female,Non\n6801,9,2.1,60,Female,Non\n6851,5,1.282,49,Female,Non\n7201,9,3,65.5,Male,Non\n7251,8,2.673,60,Female,Non\n7252,7,2.093,57.5,Female,Non\n7253,5,1.612,52,Female,Non\n8501,8,2.

In [48]:
bucket.objects

s3.Bucket.objectsCollectionManager(s3.Bucket(name='tests3csv'), s3.ObjectSummary)

# Iterate through all the objects/items in a given Bucket
doing the pagination for you. Each obj is an ObjectSummary, so it doesn't contain the body. You'll need to call get to get the whole body.

In [49]:
for obj in bucket.objects.all():
    key = obj.key
    body = obj.get()['Body'].read()
    print("key: ",key,"\nbody: ",body[:60])

key:  Stampede.csv 
body:  b'ID,DATE,TIME,TIME_OF_DAY,NETWORK,HEADLINE,SENTIMENT_LABEL,SE'
key:  landing_page_class_exercise.csv 
body:  b'path,page_type\r\n/nieuws/2020/12/30/het-irrationele-wint-aan-'
key:  skin_cancer.csv 
body:  b'keyword,count\n"1800 skin cancer research",376\n"2c 2c cancer '
key:  sleeping-alone-data.csv 
body:  b'StartDate,EndDate,Which of the following best describes your'
key:  smoking.csv 
body:  b'Id,Age,FEV,Height,Sex,Smoker\n301,9,1.708,57,Female,Non\n451,8'
key:  some_binary_data.txt 
body:  b'Here we have some data'
key:  stl_test_results.csv 
body:  b' scenario,idx,season,trend,rw\n baseline,           0 , -0.13'
key:  tasks.csv 
body:  b'name,docs,cats,word,dpc1,dpc2,dpc3,dpc4,dpc5,dpc6,dpc7,dpc8,'
key:  test.csv 
body:  b'__label__1\t"b""Dan Katzir has produced a wonderful film that'
key:  test1.csv 
body:  b'"3","Fears for T N pension after talks","Unions representing'
key:  test1.txt 
body:  b'Here we have some more data'


# Streaming Lines from an S3 object using <b>smart_open</b>

In [50]:
for line in smart_open('s3://tests3csv/smoking.csv', 'rb'):
    print(line.decode('utf8'))

Id,Age,FEV,Height,Sex,Smoker

301,9,1.708,57,Female,Non

451,8,1.724,67.5,Female,Non

501,7,1.72,54.5,Female,Non

642,9,1.558,53,Male,Non

901,9,1.895,57,Male,Non

1701,8,2.336,61,Female,Non

1752,6,1.919,58,Female,Non

1753,6,1.415,56,Female,Non

1901,8,1.987,58.5,Female,Non

1951,9,1.942,60,Female,Non

1952,6,1.602,53,Female,Non

2001,8,1.735,54,Male,Non

2101,8,2.193,58.5,Female,Non

2401,8,2.118,60.5,Male,Non

3102,8,2.258,58,Male,Non

3142,7,1.932,53,Male,Non

3541,5,1.472,50,Male,Non

3551,6,1.878,53,Female,Non

4201,9,2.352,59,Male,Non

4301,9,2.604,61.5,Male,Non

4351,5,1.4,49,Female,Non

5151,5,1.256,52.5,Female,Non

5152,4,0.839,48,Female,Non

5201,7,2.578,62.5,Male,Non

5601,9,2.988,65,Female,Non

5642,3,1.404,51.5,Male,Non

5702,9,2.348,60,Male,Non

6042,5,1.755,52,Male,Non

6101,8,2.98,60,Female,Non

6801,9,2.1,60,Female,Non

6851,5,1.282,49,Female,Non

7201,9,3,65.5,Male,Non

7251,8,2.673,60,Female,Non

7252,7,2.093,57.5,Female,Non

7253,5,1.612,52,Female,Non

8501,8,2.17

# Opening with encodings

In [16]:
with smart_open('s3://tests3csv/smoking.csv', 'rb') as s3_source:
    for line in s3_source:
         print(line.decode('utf8'))
    s3_source.seek(0)  # seek to the beginning
    b1000 = s3_source.read(1000)  # read 1000 bytes

Id,Age,FEV,Height,Sex,Smoker

301,9,1.708,57,Female,Non

451,8,1.724,67.5,Female,Non

501,7,1.72,54.5,Female,Non

642,9,1.558,53,Male,Non

901,9,1.895,57,Male,Non

1701,8,2.336,61,Female,Non

1752,6,1.919,58,Female,Non

1753,6,1.415,56,Female,Non

1901,8,1.987,58.5,Female,Non

1951,9,1.942,60,Female,Non

1952,6,1.602,53,Female,Non

2001,8,1.735,54,Male,Non

2101,8,2.193,58.5,Female,Non

2401,8,2.118,60.5,Male,Non

3102,8,2.258,58,Male,Non

3142,7,1.932,53,Male,Non

3541,5,1.472,50,Male,Non

3551,6,1.878,53,Female,Non

4201,9,2.352,59,Male,Non

4301,9,2.604,61.5,Male,Non

4351,5,1.4,49,Female,Non

5151,5,1.256,52.5,Female,Non

5152,4,0.839,48,Female,Non

5201,7,2.578,62.5,Male,Non

5601,9,2.988,65,Female,Non

5642,3,1.404,51.5,Male,Non

5702,9,2.348,60,Male,Non

6042,5,1.755,52,Male,Non

6101,8,2.98,60,Female,Non

6801,9,2.1,60,Female,Non

6851,5,1.282,49,Female,Non

7201,9,3,65.5,Male,Non

7251,8,2.673,60,Female,Non

7252,7,2.093,57.5,Female,Non

7253,5,1.612,52,Female,Non

8501,8,2.17

In [20]:
b1000

b'Id,Age,FEV,Height,Sex,Smoker\n301,9,1.708,57,Female,Non\n451,8,1.724,67.5,Female,Non\n501,7,1.72,54.5,Female,Non\n642,9,1.558,53,Male,Non\n901,9,1.895,57,Male,Non\n1701,8,2.336,61,Female,Non\n1752,6,1.919,58,Female,Non\n1753,6,1.415,56,Female,Non\n1901,8,1.987,58.5,Female,Non\n1951,9,1.942,60,Female,Non\n1952,6,1.602,53,Female,Non\n2001,8,1.735,54,Male,Non\n2101,8,2.193,58.5,Female,Non\n2401,8,2.118,60.5,Male,Non\n3102,8,2.258,58,Male,Non\n3142,7,1.932,53,Male,Non\n3541,5,1.472,50,Male,Non\n3551,6,1.878,53,Female,Non\n4201,9,2.352,59,Male,Non\n4301,9,2.604,61.5,Male,Non\n4351,5,1.4,49,Female,Non\n5151,5,1.256,52.5,Female,Non\n5152,4,0.839,48,Female,Non\n5201,7,2.578,62.5,Male,Non\n5601,9,2.988,65,Female,Non\n5642,3,1.404,51.5,Male,Non\n5702,9,2.348,60,Male,Non\n6042,5,1.755,52,Male,Non\n6101,8,2.98,60,Female,Non\n6801,9,2.1,60,Female,Non\n6851,5,1.282,49,Female,Non\n7201,9,3,65.5,Male,Non\n7251,8,2.673,60,Female,Non\n7252,7,2.093,57.5,Female,Non\n7253,5,1.612,52,Female,Non\n8501,8,2.

# Downloading a File

In [37]:
s3 = boto3.resource('s3')
bucketname ='tests3csv'
s3.Bucket(bucketname).download_file('smoking.csv', 'smoking_new.csv')

# Writing Data To a Bucket

In [39]:
some_binary_data = b'Here we have some data'
more_binary_data = b'Here we have some more data'

# Method 1: Object.put() - some binary data in the interpreter or IDE

In [43]:
s3 = boto3.resource('s3')
object = s3.Object('tests3csv', 'some_binary_data.txt')
object.put(Body=some_binary_data)

{'ResponseMetadata': {'RequestId': 'TSDPK0Y2QN5NECDZ',
  'HostId': '1GTTuhVsee+a0N2Yc5i2meKWSHbB+hd03Uj2G7RXKl5MQEvHlipWhbZltJL50a60DWchSv9UKrc=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': '1GTTuhVsee+a0N2Yc5i2meKWSHbB+hd03Uj2G7RXKl5MQEvHlipWhbZltJL50a60DWchSv9UKrc=',
   'x-amz-request-id': 'TSDPK0Y2QN5NECDZ',
   'date': 'Tue, 08 Jun 2021 06:54:16 GMT',
   'etag': '"686fe502441525967f82cad39746b385"',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'ETag': '"686fe502441525967f82cad39746b385"'}

# Method 2: Client.put_object() -- Some existing file or object

In [44]:
client = boto3.client('s3')
client.put_object(Body=more_binary_data, Bucket='tests3csv', Key='test1.txt')

{'ResponseMetadata': {'RequestId': 'TSDY03333NQBKTEH',
  'HostId': 'BClGNX+0+fMY8Vz0YHTF/8v//tw4MlBcyKTsiHLRg4Wq7xq5ORVVhnF9d/g9+Bz6GrJeUL1QzBA=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'BClGNX+0+fMY8Vz0YHTF/8v//tw4MlBcyKTsiHLRg4Wq7xq5ORVVhnF9d/g9+Bz6GrJeUL1QzBA=',
   'x-amz-request-id': 'TSDY03333NQBKTEH',
   'date': 'Tue, 08 Jun 2021 06:54:16 GMT',
   'etag': '"7ee61533af4316840a03e5b59f4a5bea"',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'ETag': '"7ee61533af4316840a03e5b59f4a5bea"'}