
# Accessing Files in AWS S3

by Emil Vassev

April 27, 2023
<br><br>
Copyright (C) 2023 - All rights reserved, do not copy or distribute without permission of the author.
***

## Read Data from CSV File in AWS S3

In [1]:
import boto3
import pandas as pd
import os
from io import StringIO


bucket_name = 'cs6512'
AWS_S3_BUCKET = bucket_name
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
AWS_SESSION_TOKEN = os.getenv("AWS_SESSION_TOKEN")

In [2]:
def read_csv_file_from_s3(csv_file_name):
    
    client = boto3.client('s3')

    object_key = csv_file_name

    csv_obj = client.get_object(Bucket=AWS_S3_BUCKET, Key=object_key)
    body = csv_obj['Body']
    csv_string = body.read().decode('utf-8')

    df = pd.read_csv(StringIO(csv_string))

    return df

In [3]:
df = read_csv_file_from_s3('instrument_price.csv')

In [4]:
df.head(10)

Unnamed: 0,instrument_ticker,currency_code,bid,offer,pricing_source_code,time
0,LINK,USD,18.08329,18.08329,KRAKEN,2/10/2022 6:31
1,BTC,USD,43821.375,43821.375,COINBASE,2/10/2022 6:31
2,LTC,USD,138.06,138.06,KRAKEN,2/10/2022 6:31
3,DAI,USD,1.0189,1.0189,BINANCE,2/10/2022 6:31
4,XRP,USD,0.8715,0.8715,BINANCE,2/10/2022 6:01
5,SOL,USD,111.415,111.415,COINBASE,2/10/2022 6:31
6,BTC,USD,43916.9,43916.9,KRAKEN,2/10/2022 6:01
7,XRP,USD,0.87188,0.87188,KRAKEN,2/10/2022 6:01
8,BTC,USD,43819.8,43819.8,KRAKEN,2/10/2022 6:31
9,USD,CAD,1.268,1.268,BANKOFCANADA,2/10/2022 5:31


## Write a CSV File to AWS S3

In [5]:
def write_csv_file_to_s3(csv_file_name, df):

    client = boto3.client(
                              "s3", 
                              aws_access_key_id=AWS_ACCESS_KEY_ID, 
                              aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
                              aws_session_token=AWS_SESSION_TOKEN
                            )
    
    with StringIO() as csv_buffer:
        df.to_csv(csv_buffer, index=False)

        response = client.put_object(Bucket=AWS_S3_BUCKET, Key=csv_file_name, Body=csv_buffer.getvalue())

    status = response.get("ResponseMetadata", {}).get("HTTPStatusCode")

    if status == 200:
        print(f"Successful S3 put_object response. Status - {status}")
    else:
        print(f"Unsuccessful S3 put_object response. Status - {status}")

In [8]:
# assign values to lists.  
data = [
        {'A': 10, 'B': 20, 'C':30}, 
        {'x':100, 'y': 200, 'z': 300},
        {'A': 101, 'B': 201, 'C':301, 'x':102, 'y': 202, 'z': 302},
        {'A': 101, 'B': 201, 'C':301, 'x':102, 'y': 202, 'z': 302}
       ]  
  
# Creates DataFrame.  
df = pd.DataFrame(data)  

write_csv_file_to_s3('test.csv', df)

Successful S3 put_object response. Status - 200


In [9]:
df = read_csv_file_from_s3('test.csv')

In [10]:
df.head()

Unnamed: 0,A,B,C,x,y,z
0,10.0,20.0,30.0,,,
1,,,,100.0,200.0,300.0
2,101.0,201.0,301.0,102.0,202.0,302.0
3,101.0,201.0,301.0,102.0,202.0,302.0
