# Helper package for AWS S3 with boto3

### Functions covered

* S3
* * `upload`
* * `read_file`

In [None]:
import pandas as pd
import numpy as np
import os

In [None]:
from boto3_custom.simple_storage import upload, read_file

### Upload directory of files into S3

##### Setup: create directory containing a file

In [None]:
# create data file inside of directory
from sklearn.datasets import load_iris, load_diabetes
iris = load_iris()
df = pd.DataFrame(iris['data'], columns=list(iris.feature_names))

dir1 = 'new_folder'
os.mkdir(dir1)
df.to_csv(os.path.join(dir1, 'iris.csv'))

#### Upload folder

In [None]:
upload('new_folder')

##### Setup: create file within subdirectory

In [None]:
diab = load_diabetes()
df2 = pd.DataFrame(diab['data'], columns=list(diab.feature_names))
df2.to_csv(os.path.join(dir1, 'diab.csv'))

#### Upload file

Remember that when uploading a file, a value for `sub_dir` (subdirectory) is required.

In [None]:
# os.getcwd()  # show current working directory

In [None]:
os.chdir('new_folder')  # change directory (into folder)
# os.getcwd()

In [None]:
upload(obj='diab.csv', sub_dir='new_folder')

os.chdir('..')  # change directory (up one level)
# os.getcwd()

### Read a file from S3

In [None]:
df_diabetes = read_file(sub_dir='new_folder', file='diab.csv')
df_diabetes.head()

##### clean up

In [None]:
# clear local workspace
for file in os.listdir(dir1):
    os.remove(os.path.join(dir1, file))
os.rmdir(dir1)

In [None]:
# After function call, folder is shown
!aws s3 ls 's3://originchain/new_folder/' --profile dl_user

In [None]:
# Clear contents uploaded to AWS S3
!aws s3 rm --recursive 's3://originchain/new_folder' --profile dl_user