# AWS
The following is an example of how to work with AWS services like S3

In [9]:
import os
import boto3
import pandas as pd
from urllib.parse import urlparse
from io import BytesIO
from getpass import getpass


def get_var(var):
    var = var.lower()
    try:
        os.environ[var.upper()]
    except KeyError:
        os.environ[var.upper()] = getpass(f"Enter {var}: ")

    var = os.environ[var.upper()]
    return var


def to_s3(data: pd.DataFrame, s3_uri: str, **kwargs):
    client = boto3.client("s3")
    parsed_s3 = urlparse(s3_uri)
    bytes_ = BytesIO()
    data.to_parquet(path=bytes_, **kwargs)
    bytes_.seek(0)
    client.upload_fileobj(Fileobj=bytes_, Bucket=parsed_s3.netloc, Key=parsed_s3.path.strip("/"))


def from_s3(s3_uri: str) -> pd.DataFrame:
    client = boto3.client("s3")
    parsed_s3 = urlparse(s3_uri)

    obj = client.get_object(Bucket=parsed_s3.netloc, Key=parsed_s3.path.strip("/"))
    bytes_ = BytesIO(obj["Body"].read())
    return pd.read_parquet(bytes_)

In [10]:
# get_var("AWS_ACCESS_KEY_ID")
# get_var("AWS_SECRET_ACCESS_KEY")
# os.environ["AWS_ACCESS_KEY_ID"] = get_var("ASHLI_AWS_ACCESS_KEY_ID")
# os.environ["AWS_SECRET_ACCESS_KEY"] = get_var("ASHLI_AWS_SECRET_ACCESS_KEY")
os.environ["AWS_ACCESS_KEY_ID"] = get_var("NAMIOT_AWS_ACCESS_KEY_ID")
os.environ["AWS_SECRET_ACCESS_KEY"] = get_var("NAMIOT_AWS_SECRET_ACCESS_KEY")
print("Setup")

Setup


In [11]:
# bucket_name = 'ahm-ashli'
bucket_name = 'ahm-weldability'
print(f'Using bucket with name: {bucket_name}')

Using bucket with name: ahm-weldability


## Example of writing a dataframe to s3 in parquet format
must have in requirements.txt

```
pandas
boto3
pyarrow
```

In [4]:
s3_key="/example/df/dummy.parquet"
test_data = pd.DataFrame([[entry for entry in range(5)] for row in range(20)])
test_data.columns = list(map(str, test_data.columns))
to_s3(test_data, f"s3://{bucket_name}{s3_key}")

In [5]:
parsed_s3 = urlparse(f"s3://{bucket_name}{s3_key}")
print(f"{parsed_s3}")
key_s3 = parsed_s3.path.strip("/")
print(f"{key_s3}")


ParseResult(scheme='s3', netloc='ahm-weldability', path='/example/df/dummy.parquet', params='', query='', fragment='')
example/df/dummy.parquet


## Example of reading data from s3 into DataFrame
must have in requirements.txt

```
pandas
boto3
pyarrow
```

In [6]:
s3_path = f"s3://{bucket_name}{s3_key}"
print(s3_path)
data = from_s3(s3_path)
data

s3://ahm-weldability/example/df/dummy.parquet


Unnamed: 0,0,1,2,3,4
0,0,1,2,3,4
1,0,1,2,3,4
2,0,1,2,3,4
3,0,1,2,3,4
4,0,1,2,3,4
5,0,1,2,3,4
6,0,1,2,3,4
7,0,1,2,3,4
8,0,1,2,3,4
9,0,1,2,3,4


In [38]:
s3client = boto3.client('s3')

## We might set a best practice that no application service account should be allowed to create or delete buckets.  This would allow better control on managing the high level settings

In [None]:
s3client.create_bucket(Bucket=bucket_name)

In [None]:
list_buckets_resp = s3client.list_buckets()
for bucket in list_buckets_resp['Buckets']:
    print('Bucket --> {} - there since {}'.format(
        bucket['Name'], bucket['CreationDate']))

## Example of creating an object using boto3 client

In [None]:
bucket_name = 'ahm-ashli'
object_key = 'test/python_sample_key.txt'

print('Uploading some data to {} with key: {}'.format(
    bucket_name, object_key))

s3client.put_object(Bucket=bucket_name, Key=object_key, Body=b'Hello World!')

## Example of generating a temporary URL to download the object without credentials

In [None]:
url = s3client.generate_presigned_url(
    'get_object', {'Bucket': bucket_name, 'Key': object_key})
print('\nTry this URL in your browser to download the object:')
print(url)