# Tutorial: Reading data from S3 files and writing data in S3 files. There are two methods
## Method 1
1) Create a session using Boto3
2) Get access to S3 resource
3) Define the object with bucket name and filename
4) Perform the Read(get) / Write(put) operation on the defined object

## Method 2
1) Create a boto3 client for S3 bucket for a given AWS region
2) Perform read(get_object) / Write(Put_Object) operation

In [2]:
import boto3

## Method1: Get S3 resource access without Credentials (Default credentials linked to IAM profile will be used)
- Create session and S3 resource

In [3]:
#Creating Session With Boto3 with credentials
session = boto3.Session()

#Creating S3 Resource From the Session.
s3_resource = session.resource('s3')

## Define a function to write data into S3 as file
- Create a S3 Object and perform the required method

In [4]:
def S3_WriteObject(bucketname,objectname,data):
    # Creat S3 object with given bucket and filename
    S3object = s3_resource.Object(bucketname, objectname)

    # Write the content in a text file
    response = S3object.put(Body=data)

    # Verify the status of operation
    status = response.get("ResponseMetadata").get("HTTPStatusCode")

    if status == 200:
        print(objectname, "has been written successfully in S3")
    else:
        print(f"Unsuccessful S3 put_object response. Status - {status}")

## Define a function to verify if the given object is present in the S3 bucket

In [5]:
# verify if a object present in a given bucket
def S3_VerifyObject(bucket,objectkey):
    my_bucket = s3_resource.Bucket(bucket)
    allbuckets = my_bucket.objects.all()
    for object in allbuckets:
        if object.key == objectkey:
            print(objectkey, 'is present in the S3')
            break
    else:
        print(objectkey, 'is not present in S3')

In [6]:
# Set the bucket name 
bucket = 'mk-logs-from-cloudwatch'

## Upload a content in the txt file

In [7]:
# upload a .txt file in the S3 bucket
txt_data = b'This is the content of the file uploaded from python boto3'

filename = 'file_name_20220825.txt'

S3_WriteObject(bucketname=bucket, objectname=filename, data=txt_data)

# Verify if the file is present in the bucket
S3_VerifyObject(bucket=bucket,objectkey=filename)

file_name_20220825.txt has been written successfully in S3
file_name_20220825.txt is present in the S3


## Store data frame as csv in S3 bucekt

In [8]:
from io import StringIO
import numpy as np
import pandas as pd

In [9]:
# Create a test data frame
dict1 = {"company": ['TCS','WIPRO','INFOSYS','MICROSFT','GOOGLE','ORACLE'],
         "Location": ['CHENNAI','MYSORE','MYSORE','HYDERABAD','MUMBAI','MUMBAI' ],
         "SIZE": [40000,35000,45000,6000,5000,4000]}
df1 = pd.DataFrame(dict1)
print(df1.shape)

(6, 3)


In [11]:
# Create a temporary buffer in a memory
csv_buffer = StringIO()

# Store the dataframe as csv file in the buffer
df1.to_csv(csv_buffer,index=False)

In [12]:
# write the csv file into S3 bucket
s3_object_name = 'IT_Companies_4.csv'

# write the csv file to S3
S3_WriteObject(bucketname=bucket, objectname=s3_object_name, data=csv_buffer.getvalue())

# Verify if the file is present in the bucket
S3_VerifyObject(bucket=bucket,objectkey=s3_object_name)

IT_Companies_4.csv has been written successfully in S3
IT_Companies_4.csv is present in the S3


## Define a function to read a file from S3 bucket

In [13]:
def S3_ReadObject(bucketname,objectname):
    # Creat S3 object with given bucket and filename
    S3object = s3_resource.Object(bucketname, objectname)

    # Write the content in a text file
    response = S3object.get()

    # Verify the status of operation
    status = response.get("ResponseMetadata").get("HTTPStatusCode")

    if status == 200:
        print("File has been read successfully")
    else:
        print(f"Unsuccessful S3 put_object response. Status - {status}")
    
    return response['Body']

### Read a text file present in the S3 bucket

In [14]:
objectname = 'file_name_20220825.txt'
content = S3_ReadObject(bucket, objectname)
text = content.read()
print(text)

File has been read successfully
b'This is the content of the file uploaded from python boto3'


### Read a csv file from S3 into a data frame

In [15]:
# read the csv file from S3 bucket
objectname = 'IT_Companies_4.csv'
content = S3_ReadObject(bucket, objectname)

# Read the data into a data frame
df2 = pd.read_csv(content)
print(df2)

File has been read successfully
    company   Location   SIZE
0       TCS    CHENNAI  40000
1     WIPRO     MYSORE  35000
2   INFOSYS     MYSORE  45000
3  MICROSFT  HYDERABAD   6000
4    GOOGLE     MUMBAI   5000
5    ORACLE     MUMBAI   4000


## Method2: ACCESS S3 USING Boto 3 CLIENT

In [16]:
# Create a S3 client
resource = 's3'
region = 'us-east-2'
s3_client = boto3.client(service_name=resource,region_name=region)

## Define a function to write content into S3 file

In [17]:
def S3ClientWrite(data,bucket,objectkey):
    s3_client.put_object(Body=data, Bucket=bucket, Key=objectkey)
    
    # Verify if the file is present in the S3 bucket
    for item in s3_client.list_objects(Bucket=bucket).items():
        if item[0] == 'Contents':
            for listitem in item[1]:
                if listitem['Key'] == objectkey:
                    print(objectkey, 'has been saved in the S3 bucket')
                    break
            else:
                print(objectkey, 'is not present in the S3 bucket')

## Write a text data into a txt file

In [18]:
# upload a .txt file in the S3 bucket
txt_data = b'This is the content of the file uploaded using boto3 client'
bucket = 'mk-logs-from-cloudwatch'
objectkey = 'test1_Boto3Client.txt'
S3ClientWrite(txt_data, bucket, objectkey)

test1_Boto3Client.txt has been saved in the S3 bucket


## Write a dataframe into a csv file in S3

In [19]:
# write the csv file to S3
objectkey = 'IT_Companies_Boto3Client.csv'
S3ClientWrite(csv_buffer.getvalue(), bucket, objectkey)

IT_Companies_Boto3Client.csv has been saved in the S3 bucket


## Define a function to read a file from S3 bucket using S3 client

In [20]:
def S3ClientRead(bucket,objectkey):
        
    # Get the file if it is present in the S3 bucket. Otherwise, print error message
    for item in s3_client.list_objects(Bucket=bucket).items():
        if item[0] == 'Contents':
            for listitem in item[1]:
                if listitem['Key'] == objectkey:
                    print(objectkey, 'is present in the S3 bucket')
                    response = s3_client.get_object(Bucket=bucket, Key=objectkey)
                    return (response['Body'])
                    break
            else:
                print(objectkey, 'is not present in the S3 bucket')

## Read a text file

In [21]:
data = S3ClientRead('mk-logs-from-cloudwatch','file_name.txt')
string = data.read()
print(string)

file_name.txt is present in the S3 bucket
b'This is the content of the file uploaded from python boto3'


## Read a csv file

In [22]:
data = S3ClientRead('mk-logs-from-cloudwatch','IT_Companies_Boto3Client.csv')
df=pd.read_csv(data)
print(df)

IT_Companies_Boto3Client.csv is present in the S3 bucket
    company   Location   SIZE
0       TCS    CHENNAI  40000
1     WIPRO     MYSORE  35000
2   INFOSYS     MYSORE  45000
3  MICROSFT  HYDERABAD   6000
4    GOOGLE     MUMBAI   5000
5    ORACLE     MUMBAI   4000
