In [49]:
import boto3
import time
import zipfile
from io import BytesIO

## Preparing Data in local S3 Bucket

In [50]:
DATASET_SOURCE = {
    'Bucket': 'aws-jam-challenge-resources',
    'Key': 'covid-19-with-sage-maker/dataset/radiography_train_data_10.zip'
}

BUCKET = "covid-19-image-dataset-"+str(int(time.time()))
REGION = "us-west-1"
ZIP_KEY = 'temp/radiography_train_data_10.zip'

s3_client = boto3.client('s3', region_name=REGION)
s3_resource = boto3.resource('s3')
jam_bucket = s3_resource.Bucket(BUCKET)

### Creating local S3 Bucket in the JAM Account

In [51]:
print ("Creating Bucket: ", BUCKET)
s3_client.create_bucket(Bucket=BUCKET,
                        CreateBucketConfiguration={'LocationConstraint': REGION})

Creating Bucket:  covid-19-image-dataset-1635776660


{'ResponseMetadata': {'RequestId': 'YBSZW5TJ3YVMKRMS',
  'HostId': 'fbB5/PDs0V+kyZTDIRC7WMx3yZviJ+HePah9vreeCevjdn6gRb3l61+U6vxlEKq86NcCKJtJLWk=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'fbB5/PDs0V+kyZTDIRC7WMx3yZviJ+HePah9vreeCevjdn6gRb3l61+U6vxlEKq86NcCKJtJLWk=',
   'x-amz-request-id': 'YBSZW5TJ3YVMKRMS',
   'date': 'Mon, 01 Nov 2021 14:24:22 GMT',
   'location': 'http://covid-19-image-dataset-1635776660.s3.amazonaws.com/',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'Location': 'http://covid-19-image-dataset-1635776660.s3.amazonaws.com/'}

### Copying zipped Dataset into the local S3 Bucket

In [52]:
print ("Copying Data from: ", DATASET_SOURCE)
jam_bucket.copy(DATASET_SOURCE, ZIP_KEY)

Copying Data from:  {'Bucket': 'aws-jam-challenge-resources', 'Key': 'covid-19-with-sage-maker/dataset/radiography_train_data_10.zip'}


### Unzipping Dataset into the local S3 Bucket

In [53]:
print ("Unzipping ", ZIP_KEY)
zip_obj = s3_resource.Object(bucket_name=BUCKET, key=ZIP_KEY)
buffer = BytesIO(zip_obj.get()["Body"].read())
z = zipfile.ZipFile(buffer)
for filename in z.namelist():
    file_info = z.getinfo(filename)
    s3_resource.meta.client.upload_fileobj(
        z.open(filename),
        Bucket=BUCKET,
        Key=f'{filename}'
    )
print ("Completed Unzipping Training Data")
jam_bucket.objects.filter(Prefix="__MACOSX/").delete()
jam_bucket.objects.filter(Prefix="temp/").delete()

Unzipping  temp/radiography_train_data_10.zip
Completed Unzipping Training Data


[{'ResponseMetadata': {'RequestId': '9QCXKPCKK8MT8KRY',
   'HostId': '6+TEebMgyHZxOUXwOCkNd1ynMk1ZyP2k18I9Y3LuOxZH0dejsv/KS11yA/sZ8DG2r1doixJIg88=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': '6+TEebMgyHZxOUXwOCkNd1ynMk1ZyP2k18I9Y3LuOxZH0dejsv/KS11yA/sZ8DG2r1doixJIg88=',
    'x-amz-request-id': '9QCXKPCKK8MT8KRY',
    'date': 'Mon, 01 Nov 2021 14:26:02 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'temp/radiography_train_data_10.zip'}]}]