### Reading the files into AWS Wrangler and writing to S3

In [2]:
#import below libraries
import awswrangler as wr
import pandas as pd
import boto3
import warnings 
warnings.filterwarnings('ignore')
import configparser

In [3]:
#reading the credentials securely.
credents = configparser.ConfigParser()

In [4]:
#use read_file method
credents.read_file(open('credentials.config'))

In [5]:
#Reading in the credentials into Python variables. No can see them
aws_key = credents["AWS"]["KEY"]
aws_secret = credents["AWS"]["SECRET"]
region = credents["AWS"]["REGION"]

In [6]:
#Creating the Session
your_session = boto3.Session(aws_access_key_id=aws_key,
                            aws_secret_access_key=aws_secret,
                            region_name=region)

In [10]:
destination_bucket = "s3://destination-folder"

### Writing pandas Dataframe to S3

In [8]:
earners_csv = pd.read_csv("source_folder/top_earners_list.csv")
earners_csv

Unnamed: 0,id,name,E-mail,Salary,occupation
0,1,Joel,Joel@Athena.com,187069,Mathematician
1,2,Afro,Afro@Glue.aws.in,752689,Physicist
2,3,Beatles,beatles@lambda.com,975682,Algorist
3,4,Snoop Dog,snoopy@apigateway.com,752689,Artificial Rapper


### Commonly used params in wr.s3.to_csv()
wr.s3.to_csv(df: pandas.core.frame.DataFrame,
    
    path: Optional[str] = None,
    
    index: bool = True,
    
    boto3_session: Optional[boto3.session.Session] = None,
    
    dataset: bool = False,
    
    filename_prefix: Optional[str] = None,
    
    partition_cols: Optional[List[str]] = None,
    
    bucketing_info: Optional[Tuple[List[str], int]] = None,
    
    database: Optional[str] = None,
    
    table: Optional[str] = None)

#### Writing as CSV file

In [23]:
wr.s3.to_csv(df=earners_csv,
        path=destination_bucket + '/csv/write_top_earners.csv',
    boto3_session=your_session)

{'paths': ['s3://destination-folder/csv/write_top_earners.csv'],
 'partitions_values': {}}

In [24]:
# Verify the data is written using list_objects

wr.s3.list_objects(destination_bucket,boto3_session=your_session)

['s3://destination-folder/csv/write_top_earners.csv']

#### Writing as xls file

In [25]:
wr.s3.to_excel(df=earners_csv,
        path=destination_bucket + '/excel/write_top_earners.xls',
    boto3_session=your_session)

's3://destination-folder/excel/write_top_earners.xls'

In [26]:
# Verify the data is written using list_objects

wr.s3.list_objects(destination_bucket,boto3_session=your_session)

['s3://destination-folder/csv/write_top_earners.csv',
 's3://destination-folder/excel/write_top_earners.xls']

#### Writing as xlsx file: They are different

In [31]:
wr.s3.to_excel(df=earners_csv,
        path=destination_bucket + '/excel/write_top_earners.xlsx',
    boto3_session=your_session)

's3://destination-folder/excel/write_top_earners.xlsx'

In [32]:
# Verify the data is written using list_objects

wr.s3.list_objects(destination_bucket,boto3_session=your_session)

['s3://destination-folder/csv/write_top_earners.csv',
 's3://destination-folder/excel/write_top_earners.xls',
 's3://destination-folder/excel/write_top_earners.xlsx',
 's3://destination-folder/json/write_top_earners.json',
 's3://destination-folder/parquet/write_top_earners.parquet']

#### Writing as parquet file

In [33]:
wr.s3.to_csv(df=earners_csv,
        path=destination_bucket + '/parquet/write_top_earners.parquet',
    boto3_session=your_session)

{'paths': ['s3://destination-folder/parquet/write_top_earners.parquet'],
 'partitions_values': {}}

In [34]:
# Verify the data is written using list_objects

wr.s3.list_objects(destination_bucket,boto3_session=your_session)

['s3://destination-folder/csv/write_top_earners.csv',
 's3://destination-folder/excel/write_top_earners.xls',
 's3://destination-folder/excel/write_top_earners.xlsx',
 's3://destination-folder/json/write_top_earners.json',
 's3://destination-folder/parquet/write_top_earners.parquet']

#### Writing as json file

In [29]:
wr.s3.to_json(df=earners_csv,
        path=destination_bucket + '/json/write_top_earners.json',
    boto3_session=your_session)

['s3://destination-folder/json/write_top_earners.json']

In [30]:
# Verify the data is written using list_objects

wr.s3.list_objects(destination_bucket,boto3_session=your_session)

['s3://destination-folder/csv/write_top_earners.csv',
 's3://destination-folder/excel/write_top_earners.xls',
 's3://destination-folder/json/write_top_earners.json',
 's3://destination-folder/parquet/write_top_earners.parquet']

#### Cleaning up

In [35]:
wr.s3.delete_objects(path=destination_bucket,boto3_session=your_session)

In [36]:
# Verify the data is written using list_objects

wr.s3.list_objects(destination_bucket,boto3_session=your_session)

[]