### Reading the files into AWS Wrangler and writing to S3

In [1]:
#import below libraries
import awswrangler as wr
import pandas as pd
import boto3
import warnings 
warnings.filterwarnings('ignore')
import configparser

In [2]:
#reading the credentials securely.
credents = configparser.ConfigParser()

In [3]:
#use read_file method
credents.read_file(open('credentials.config'))

In [4]:
#Reading in the credentials into Python variables. No can see them
aws_key = credents["AWS"]["KEY"]
aws_secret = credents["AWS"]["SECRET"]
region = credents["AWS"]["REGION"]

In [5]:
#Creating the Session
your_session = boto3.Session(aws_access_key_id=aws_key,
                            aws_secret_access_key=aws_secret,
                            region_name=region)

### Since we are reading from S3. The bucket will be source

In [7]:
source_bucket = "s3://destination-folder"

In [31]:
csv_folder = "/csv"
excel_folder = "/excel"
parquet_folder = "/parquet"
json_folder = '/json'

In [9]:
#Always build the path for making it easy for programming
csv_path = source_bucket + csv_folder
csv_path

's3://destination-folder/csv'

### Reading CSV file inside S3

wr.s3.read_csv( path: Union[str, List[str]],
    
    path_suffix: Union[str, List[str], NoneType] = None,
    
    path_ignore_suffix: Union[str, List[str], NoneType] = None,
    
    boto3_session: Optional[boto3.session.Session] = None,
    
    chunksize: Optional[int] = None,
    
    dataset: bool = False) 

In [10]:
read_wr_csv = wr.s3.read_csv(path=csv_path,
                             boto3_session=your_session,)

In [11]:
read_wr_csv

Unnamed: 0.1,Unnamed: 0,id,name,E-mail,Salary,occupation
0,0,1,Joel,Joel@Athena.com,187069,Mathematician
1,1,2,Afro,Afro@Glue.aws.in,752689,Physicist
2,2,3,Beatles,beatles@lambda.com,975682,Algorist
3,3,4,Snoop Dog,snoopy@apigateway.com,752689,Artificial Rapper


### Commonly used params in wr.s3.to_csv()
wr.s3.to_csv(df: pandas.core.frame.DataFrame,
    
    path: Optional[str] = None,
    
    index: bool = True,
    
    boto3_session: Optional[boto3.session.Session] = None,
    
    dataset: bool = False,
    
    filename_prefix: Optional[str] = None,
    
    partition_cols: Optional[List[str]] = None,
    
    bucketing_info: Optional[Tuple[List[str], int]] = None,
    
    database: Optional[str] = None,
    
    table: Optional[str] = None)

#### Reading xls file : Need to give full path

In [12]:
#Always build the path for making it easy for programming
excel_path = source_bucket + excel_folder
excel_path

's3://destination-folder/excel'

### You have to provide the full path for the XLS files with Prefix

In [16]:
read_excel = wr.s3.read_excel(path=excel_path + '/write_top_earners.xls',
                              boto3_session=your_session)
read_excel

Unnamed: 0.1,Unnamed: 0,id,name,E-mail,Salary,occupation
0,0,1,Joel,Joel@Athena.com,187069,Mathematician
1,1,2,Afro,Afro@Glue.aws.in,752689,Physicist
2,2,3,Beatles,beatles@lambda.com,975682,Algorist
3,3,4,Snoop Dog,snoopy@apigateway.com,752689,Artificial Rapper


#### reading parquet file

In [29]:
source = source_bucket+parquet_folder + '/write_top_earners.parquet'
source

's3://destination-folder/parquet/write_top_earners.parquet'

In [30]:
read_parquet = wr.s3.read_parquet(path=source,
                              boto3_session=your_session)
read_parquet

ArrowInvalid: Parquet magic bytes not found in footer. Either the file is corrupted or this is not a parquet file.

#### reading json file

In [32]:
source = source_bucket+ json_folder
source

's3://destination-folder/json'

In [33]:
json_read = wr.s3.read_json(path= source,
    boto3_session=your_session)

In [34]:
json_read

Unnamed: 0,id,name,E-mail,Salary,occupation
0,1,Joel,Joel@Athena.com,187069,Mathematician
1,2,Afro,Afro@Glue.aws.in,752689,Physicist
2,3,Beatles,beatles@lambda.com,975682,Algorist
3,4,Snoop Dog,snoopy@apigateway.com,752689,Artificial Rapper


#### Cleaning up

In [21]:
wr.s3.delete_objects(path=destination_bucket,boto3_session=your_session)

In [22]:
# Verify the data is written using list_objects

wr.s3.list_objects(destination_bucket,boto3_session=your_session)

[]