### Writing the dataframe into AWS Wrangler and writing to S3

In [1]:
#import below libraries
import awswrangler as wr
import pandas as pd
import boto3
import warnings 
warnings.filterwarnings('ignore')
import configparser

In [2]:
#reading the credentials securely.
credents = configparser.ConfigParser()

In [3]:
#use read_file method
credents.read_file(open('credentials.config'))

In [4]:
#Reading in the credentials into Python variables. No can see them
aws_key = credents["AWS"]["KEY"]
aws_secret = credents["AWS"]["SECRET"]
region = credents["AWS"]["REGION"]

In [5]:
#Creating the Session
your_session = boto3.Session(aws_access_key_id=aws_key,
                            aws_secret_access_key=aws_secret,
                            region_name=region)

In [6]:
destination_bucket = "s3://destination-folder"

### Writing pandas Dataframe to S3

In [7]:
import pandas as pd

In [8]:
!cat source_folder/top_earners_list.csv

id,name,E-mail,Salary,occupation
1,Joel,Joel@Athena.com,187069,Mathematician
2,Afro,Afro@Glue.aws.in,752689,Physicist
3,Beatles,beatles@lambda.com,975682,Algorist
4,Snoop Dog,snoopy@apigateway.com,752689,Artificial Rapper


In [31]:
earners_csv = pd.read_csv("source_folder/top_earners_list.csv")
earners_csv

Unnamed: 0,id,name,E-mail,Salary,occupation
0,1,Joel,Joel@Athena.com,187069,Mathematician
1,2,Afro,Afro@Glue.aws.in,752689,Physicist
2,3,Beatles,beatles@lambda.com,975682,Algorist
3,4,Snoop Dog,snoopy@apigateway.com,752689,Artificial Rapper


In [32]:
type(earners_csv)

pandas.core.frame.DataFrame

### Commonly used params in wr.s3.to_csv()
wr.s3.to_csv(df: pandas.core.frame.DataFrame,
    
    path: Optional[str] = None,
    
    index: bool = True,
    
    boto3_session: Optional[boto3.session.Session] = None,
    
    dataset: bool = False,
    
    filename_prefix: Optional[str] = None,
    
    partition_cols: Optional[List[str]] = None,
    
    bucketing_info: Optional[Tuple[List[str], int]] = None,
    
    database: Optional[str] = None,
    
    table: Optional[str] = None)

#### Writing as CSV file

In [33]:
destination_bucket + '/csv/write_top_earners.csv'

's3://destination-folder/csv/write_top_earners.csv'

In [34]:
wr.s3.list_objects(path=destination_bucket,boto3_session=your_session)

[]

In [44]:
wr.s3.to_csv(df=earners_csv,
        path=destination_bucket + '/csv/write_top_earners.csv',
    boto3_session=your_session)

{'paths': ['s3://destination-folder/csv/write_top_earners.csv'],
 'partitions_values': {}}

In [36]:
# Verify the data is written using list_objects

wr.s3.list_objects(destination_bucket,boto3_session=your_session)

['s3://destination-folder/csv/write_top_earners.csv']

#### Writing as xls file

In [45]:
wr.s3.to_excel(df=earners_csv,
        path=destination_bucket + '/excel/write_top_earners.xls',
    boto3_session=your_session)

's3://destination-folder/excel/write_top_earners.xls'

In [38]:
# Verify the data is written using list_objects

wr.s3.list_objects(destination_bucket,boto3_session=your_session)

['s3://destination-folder/csv/write_top_earners.csv',
 's3://destination-folder/excel/write_top_earners.xls']

#### Writing as xlsx file: They are different

In [46]:
wr.s3.to_excel(df=earners_csv,
        path=destination_bucket + '/excel/write_top_earners.xlsx',
    boto3_session=your_session)

's3://destination-folder/excel/write_top_earners.xlsx'

In [40]:
# Verify the data is written using list_objects

wr.s3.list_objects(destination_bucket,boto3_session=your_session)

['s3://destination-folder/csv/write_top_earners.csv',
 's3://destination-folder/excel/write_top_earners.xls',
 's3://destination-folder/excel/write_top_earners.xlsx']

#### Writing as parquet file

In [48]:
wr.s3.to_parquet(df=earners_csv,
        path=destination_bucket + '/parquet/',
             dataset=True,
    boto3_session=your_session)

{'paths': ['s3://destination-folder/parquet/db1c4a0fa56c4db9b9d6044cd94cd5e1.snappy.parquet'],
 'partitions_values': {}}

In [25]:
# Verify the data is written using list_objects

wr.s3.list_objects(destination_bucket,boto3_session=your_session)

['s3://destination-folder/csv/write_top_earners.csv',
 's3://destination-folder/excel/write_top_earners.xls',
 's3://destination-folder/excel/write_top_earners.xlsx',
 's3://destination-folder/parquet/dc60850d9f784c6cad70f79df29c6eeb.snappy.parquet']

#### Writing as json file

In [49]:
wr.s3.to_json(df=earners_csv,
        path=destination_bucket + '/json/write_top_earners.json',
    boto3_session=your_session)

['s3://destination-folder/json/write_top_earners.json']

In [50]:
# Verify the data is written using list_objects

wr.s3.list_objects(destination_bucket,boto3_session=your_session)

['s3://destination-folder/csv/write_top_earners.csv',
 's3://destination-folder/excel/write_top_earners.xls',
 's3://destination-folder/excel/write_top_earners.xlsx',
 's3://destination-folder/json/write_top_earners.json',
 's3://destination-folder/parquet/db1c4a0fa56c4db9b9d6044cd94cd5e1.snappy.parquet',
 's3://destination-folder/parquet_exercise/7530e940483a46ce8ca246de57a3291c.snappy.parquet']

#### Cleaning up

In [42]:
wr.s3.delete_objects(path=destination_bucket,boto3_session=your_session)

In [43]:
# Verify the data is written using list_objects

wr.s3.list_objects(destination_bucket,boto3_session=your_session)

[]