# AWS Glue Studio Notebook
##### You are now running a AWS Glue Studio notebook; To start using your notebook you need to start an AWS Glue Interactive Session.


#### Optional: Run this cell to see available notebook commands ("magics").


####  Run this cell to set up and start your interactive session.


In [3]:
%idle_timeout 2880
%glue_version 3.0
%worker_type G.1X
%number_of_workers 5

import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
  
sc = SparkContext.getOrCreate()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)

You are already connected to a glueetl session 60651a68-2a8a-42f4-bf71-19d30d4a4a6e.

No change will be made to the current session that is set as glueetl. The session configuration change will apply to newly created sessions.


Current idle_timeout is 2880 minutes.
idle_timeout has been set to 2880 minutes.


You are already connected to a glueetl session 60651a68-2a8a-42f4-bf71-19d30d4a4a6e.

No change will be made to the current session that is set as glueetl. The session configuration change will apply to newly created sessions.


Setting Glue version to: 3.0


You are already connected to a glueetl session 60651a68-2a8a-42f4-bf71-19d30d4a4a6e.

No change will be made to the current session that is set as glueetl. The session configuration change will apply to newly created sessions.


Previous worker type: G.1X
Setting new worker type to: G.1X


You are already connected to a glueetl session 60651a68-2a8a-42f4-bf71-19d30d4a4a6e.

No change will be made to the current session that is set as glueetl. The session configuration change will apply to newly created sessions.


Previous number of workers: 5
Setting new number of workers to: 5



In [6]:
# read data from table of glue cata log
enigma_jhu_dyf = glueContext.create_dynamic_frame.from_catalog(database='covid_19', table_name='enigma_jhu')
nytimes_data_in_usa_us_county_dyf = glueContext.create_dynamic_frame.from_catalog(database='covid_19', table_name='nytimes_data_in_usa_us_county')
nytimes_data_in_usa_us_states_dyf = glueContext.create_dynamic_frame.from_catalog(database='covid_19', table_name='nytimes_data_in_usa_us_states')
rearc_covid19_testing_states_daily_dyf = glueContext.create_dynamic_frame.from_catalog(database='covid_19', table_name='rearc_covid19_testing_states_daily')
rearc_covid19_testing_us_daily_dyf = glueContext.create_dynamic_frame.from_catalog(database='covid_19', table_name='rearc_covid19_testing_us_daily')
rearc_covid19_testing_us_total_latest_dyf = glueContext.create_dynamic_frame.from_catalog(database='covid_19', table_name='rearc_covid19_testing_us_total_latest')
rearc_usa_hospital_beds_dyf = glueContext.create_dynamic_frame.from_catalog(database='covid_19', table_name='rearc_usa_hospital_beds')
static_dataset_countrycode_dyf = glueContext.create_dynamic_frame.from_catalog(database='covid_19', table_name='static_dataset_countrycode')
static_dataset_countypopulation_dyf = glueContext.create_dynamic_frame.from_catalog(database='covid_19', table_name='static_dataset_countypopulation')
static_dataset_state_abv_dyf = glueContext.create_dynamic_frame.from_catalog(database='covid_19', table_name='static_dataset_state_abv')




In [9]:
# change to spark dataframe
enigma_jhu_df = enigma_jhu_dyf.toDF()
nytimes_data_in_usa_us_county_df = nytimes_data_in_usa_us_county_dyf.toDF()
nytimes_data_in_usa_us_states_df = nytimes_data_in_usa_us_states_dyf.toDF()
rearc_covid19_testing_states_daily_df = rearc_covid19_testing_states_daily_dyf.toDF()
rearc_covid19_testing_us_daily_df =rearc_covid19_testing_us_daily_dyf.toDF()
rearc_covid19_testing_us_total_latest_df = rearc_covid19_testing_us_total_latest_dyf.toDF()
rearc_usa_hospital_beds_df = rearc_usa_hospital_beds_dyf.toDF()
static_dataset_countrycode_df = static_dataset_countrycode_dyf.toDF()
static_dataset_countypopulation_df = static_dataset_countypopulation_dyf.toDF()
static_dataset_state_abv_df = static_dataset_state_abv_dyf.toDF()







#### Handle county level fips code

In [10]:
enigma_jhu_df.show(5)

+------+--------------+--------------+-------------------+--------+---------+---------+------+---------------+---------+----+------+
|admin2|province_state|country_region|        last_update|latitude|longitude|confirmed|active|   combined_key|recovered|fips|deaths|
+------+--------------+--------------+-------------------+--------+---------+---------+------+---------------+---------+----+------+
|      |      Liaoning|         China|2020-01-22T17:00:00|  41.296|  122.609|        2|      |Liaoning, China|     null|null|  null|
|      |       Beijing|         China|2020-01-23T17:00:00|  40.182|  116.414|       22|      | Beijing, China|     null|null|  null|
|      |       Ningxia|         China|2020-01-23T17:00:00|  37.269|  106.165|        1|      | Ningxia, China|     null|null|  null|
|      |              |       Vietnam|2020-01-23T17:00:00|    16.0|    108.0|        2|      |        Vietnam|     null|null|  null|
|      |           Hai|         China|2020-01-24T17:00:00|  19.196|  

In [17]:
from pyspark.sql.functions import when, length, concat, col, format_string




In [31]:
# fips code of county-level
# and then add '0' before 4 digit fips code
# and only take rows that have 5 digit fips and in range(01000, 56045) 
enigma_jhu_df = enigma_jhu_df.withColumnRenamed('fips', 'old_fips') \
    .withColumn('fips', format_string("%05d", col('old_fips').cast('int'))) \
    .drop('old_fips') \
    .filter((col('fips').rlike('^[0-9]{5}$')) & (col('fips') >= '01000') & (col('fips') <= '56045'))

nytimes_data_in_usa_us_county_df = nytimes_data_in_usa_us_county_df.withColumnRenamed('fips', 'old_fips') \
    .withColumn('fips', format_string("%05d", col('old_fips').cast('int'))) \
    .drop('old_fips') \
    .filter((col('fips').rlike('^[0-9]{5}$')) & (col('fips') >= '01000') & (col('fips') <= '56045'))

rearc_usa_hospital_beds_df = rearc_usa_hospital_beds_df.withColumnRenamed('fips', 'old_fips') \
    .withColumn('fips', format_string("%05d", col('old_fips').cast('int'))) \
    .drop('old_fips') \
    .filter((col('fips').rlike('^[0-9]{5}$')) & (col('fips') >= '01000') & (col('fips') <= '56045'))
                    




In [32]:
enigma_jhu_df.show(10)

+------------+--------------+--------------+-------------------+--------+---------+---------+------+--------------------+---------+------+-----+
|      admin2|province_state|country_region|        last_update|latitude|longitude|confirmed|active|        combined_key|recovered|deaths| fips|
+------------+--------------+--------------+-------------------+--------+---------+---------+------+--------------------+---------+------+-----+
|  San Benito|    California|            US|2020-02-03T03:53:02|  36.576| -120.988|        2|      |San Benito, Calif...|        0|     0|06069|
|        King|    Washington|            US|2020-03-02T20:23:16|  47.548| -121.984|       14|      |King, Washington, US|        1|     5|53033|
|        King|    Washington|            US|2020-03-04T19:53:02|  47.548| -121.984|       31|      |King, Washington, US|        1|     9|53033|
|   Snohomish|    Washington|            US|2020-03-04T19:53:02|  48.033| -121.834|        8|      |Snohomish, Washin...|        0

In [34]:
# rename 'admin2' column to 'county' column
enigma_jhu_df = enigma_jhu_df.withColumnRenamed('admin2', 'county')




In [35]:
enigma_jhu_df.show(10)

+------------+--------------+--------------+-------------------+--------+---------+---------+------+--------------------+---------+------+-----+
|      county|province_state|country_region|        last_update|latitude|longitude|confirmed|active|        combined_key|recovered|deaths| fips|
+------------+--------------+--------------+-------------------+--------+---------+---------+------+--------------------+---------+------+-----+
|  San Benito|    California|            US|2020-02-03T03:53:02|  36.576| -120.988|        2|      |San Benito, Calif...|        0|     0|06069|
|        King|    Washington|            US|2020-03-02T20:23:16|  47.548| -121.984|       14|      |King, Washington, US|        1|     5|53033|
|        King|    Washington|            US|2020-03-04T19:53:02|  47.548| -121.984|       31|      |King, Washington, US|        1|     9|53033|
|   Snohomish|    Washington|            US|2020-03-04T19:53:02|  48.033| -121.834|        8|      |Snohomish, Washin...|        0

In [67]:
# now check the nytimes_data_in_usa_us_county_df 
nytimes_data_in_usa_us_county_df.show(10)

+----------+-----------+----------+-----+------+-----+
|      date|     county|     state|cases|deaths| fips|
+----------+-----------+----------+-----+------+-----+
|2020-01-21|  Snohomish|Washington|    1|     0|53061|
|2020-01-22|  Snohomish|Washington|    1|     0|53061|
|2020-01-23|  Snohomish|Washington|    1|     0|53061|
|2020-01-24|       Cook|  Illinois|    1|     0|17031|
|2020-01-24|  Snohomish|Washington|    1|     0|53061|
|2020-01-25|     Orange|California|    1|     0|06059|
|2020-01-25|       Cook|  Illinois|    1|     0|17031|
|2020-01-25|  Snohomish|Washington|    1|     0|53061|
|2020-01-26|   Maricopa|   Arizona|    1|     0|04013|
|2020-01-26|Los Angeles|California|    1|     0|06037|
+----------+-----------+----------+-----+------+-----+
only showing top 10 rows


In [37]:
# check rearc_usa_hospital_beds_df
rearc_usa_hospital_beds_df.show(10)

+--------+--------------------+--------------------+--------------------+-----------+-----------+--------+-----------+--------------------+--------------------+----------+---------+-----------------+----------------+------------+--------------+-------------+---------------+--------------------+-------------------------------+------------------+-------------------+-----+
|OBJECTID|       HOSPITAL_NAME|       HOSPITAL_TYPE|          HQ_ADDRESS|HQ_ADDRESS1|    HQ_CITY|HQ_STATE|HQ_ZIP_CODE|         COUNTY_NAME|          STATE_NAME|STATE_FIPS|CNTY_FIPS|NUM_LICENSED_BEDS|NUM_STAFFED_BEDS|NUM_ICU_BEDS|ADULT_ICU_BEDS|PEDI_ICU_BEDS|BED_UTILIZATION|AVG_VENTILATOR_USAGE|Potential_Increase_In_Bed_Capac|          latitude|         longtitude| fips|
+--------+--------------------+--------------------+--------------------+-----------+-----------+--------+-----------+--------------------+--------------------+----------+---------+-----------------+----------------+------------+--------------+----------

In [39]:
# validate 5 digit zip code
rearc_usa_hospital_beds_df = rearc_usa_hospital_beds_df.withColumnRenamed('hq_zip_code', 'old_hq_zip_code') \
    .withColumn('hq_zip_code', format_string("%05d", col('old_hq_zip_code').cast('int'))) \
    .drop('old_hq_zip_code')




In [41]:
# drop column hq_address1
rearc_usa_hospital_beds_df = rearc_usa_hospital_beds_df.drop('hq_address1')




In [42]:
rearc_usa_hospital_beds_df.show(5)

+--------+--------------------+-------------+--------------------+----------+--------+-----------+-----------+----------+---------+-----------------+----------------+------------+--------------+-------------+---------------+--------------------+-------------------------------+------------------+-------------------+-----+-----------+
|OBJECTID|       HOSPITAL_NAME|HOSPITAL_TYPE|          HQ_ADDRESS|   HQ_CITY|HQ_STATE|COUNTY_NAME| STATE_NAME|STATE_FIPS|CNTY_FIPS|NUM_LICENSED_BEDS|NUM_STAFFED_BEDS|NUM_ICU_BEDS|ADULT_ICU_BEDS|PEDI_ICU_BEDS|BED_UTILIZATION|AVG_VENTILATOR_USAGE|Potential_Increase_In_Bed_Capac|          latitude|         longtitude| fips|hq_zip_code|
+--------+--------------------+-------------+--------------------+----------+--------+-----------+-----------+----------+---------+-----------------+----------------+------------+--------------+-------------+---------------+--------------------+-------------------------------+------------------+-------------------+-----+--------

In [43]:
enigma_jhu_df = enigma_jhu_df.withColumnRenamed('fips', 'county_fips')
nytimes_data_in_usa_us_county_df = nytimes_data_in_usa_us_county_df.withColumnRenamed('fips', 'county_fips')
rearc_usa_hospital_beds_df = rearc_usa_hospital_beds_df.withColumnRenamed('fips', 'county_fips')




#### Handle state level fips code

In [48]:
rearc_covid19_testing_states_daily_df = rearc_covid19_testing_states_daily_df.withColumn('state_fips', format_string("%02d", col('fips').cast('int'))) \
    .drop('fips') \
    .filter((col('state_fips').rlike('^[0-9]{2}$')) & (col('state_fips') >= '01') & (col('state_fips') <= '56'))
rearc_covid19_testing_us_daily_df = rearc_covid19_testing_us_daily_df.withColumn('state_fips', format_string("%02d", col('states').cast('int'))) \
    .drop('states') \
    .filter((col('state_fips').rlike('^[0-9]{2}$')) & (col('state_fips') >= '01') & (col('state_fips') <= '56'))





In [52]:
rearc_covid19_testing_us_daily_df.select('state_fips').show(10)

+----------+
|state_fips|
+----------+
|        56|
|        56|
|        56|
|        56|
|        56|
|        56|
|        56|
|        56|
|        56|
|        56|
+----------+
only showing top 10 rows


#### handle date in data

In [76]:
from pyspark.sql.functions import to_date, date_format, col, to_timestamp 




In [106]:
rearc_covid19_testing_states_daily_df = rearc_covid19_testing_states_daily_df.withColumn('date_str', col('date').cast('string'))
rearc_covid19_testing_states_daily_df = rearc_covid19_testing_states_daily_df.withColumn('timestamp',  to_timestamp("date_str", "yyyyMMdd"))
rearc_covid19_testing_states_daily_df = rearc_covid19_testing_states_daily_df.withColumn('date', date_format('timestamp', 'yyyy-MM-dd'))




In [108]:
rearc_covid19_testing_states_daily_df = rearc_covid19_testing_states_daily_df.drop('date_str')
rearc_covid19_testing_states_daily_df = rearc_covid19_testing_states_daily_df.drop('timestamp')




In [110]:
rearc_covid19_testing_states_daily_df.select('date').show(5)

+----------+
|      date|
+----------+
|2021-03-07|
|2021-03-07|
|2021-03-07|
|2021-03-07|
|2021-03-07|
+----------+
only showing top 5 rows


In [111]:
rearc_covid19_testing_us_daily_df = rearc_covid19_testing_us_daily_df.withColumn('date_str', col('date').cast('string'))
rearc_covid19_testing_us_daily_df = rearc_covid19_testing_us_daily_df.withColumn('timestamp',  to_timestamp("date_str", "yyyyMMdd"))
rearc_covid19_testing_us_daily_df = rearc_covid19_testing_us_daily_df.withColumn('date', date_format('timestamp', 'yyyy-MM-dd'))




In [112]:
rearc_covid19_testing_us_daily_df = rearc_covid19_testing_us_daily_df.drop('date_str')
rearc_covid19_testing_us_daily_df = rearc_covid19_testing_us_daily_df.drop('timestamp')




In [113]:
rearc_covid19_testing_us_daily_df.select('date').show(5)

+----------+
|      date|
+----------+
|2021-03-07|
|2021-03-06|
|2021-03-05|
|2021-03-04|
|2021-03-03|
+----------+
only showing top 5 rows


In [114]:
nytimes_data_in_usa_us_states_df.select('date').show(5)

+----------+
|      date|
+----------+
|2020-01-21|
|2020-01-22|
|2020-01-23|
|2020-01-24|
|2020-01-24|
+----------+
only showing top 5 rows


In [115]:
nytimes_data_in_usa_us_county_df.select('date').show(5)

+----------+
|      date|
+----------+
|2020-01-21|
|2020-01-22|
|2020-01-23|
|2020-01-24|
|2020-01-24|
+----------+
only showing top 5 rows


#### Handle null value

In [116]:
from pyspark.sql.functions import isnull




In [138]:
for col_name in rearc_usa_hospital_beds_df.columns:
    # Check for null values
    if rearc_usa_hospital_beds_df.filter(col(col_name).isNull()).count() > 0:
        # Replace null values with 0
        rearc_usa_hospital_beds_df = rearc_usa_hospital_beds_df.fillna({col_name: 0})




In [141]:
for col_name in rearc_covid19_testing_states_daily_df.columns:
    # Check for null values
    if rearc_covid19_testing_states_daily_df.filter(col(col_name).isNull()).count() > 0:
        # Replace null values with 0
        rearc_covid19_testing_states_daily_df = rearc_covid19_testing_states_daily_df.fillna({col_name: 0})




In [142]:
for col_name in rearc_covid19_testing_us_daily_df.columns:
    # Check for null values
    if rearc_covid19_testing_us_daily_df.filter(col(col_name).isNull()).count() > 0:
        # Replace null values with 0
        rearc_covid19_testing_us_daily_df = rearc_covid19_testing_us_daily_df.fillna({col_name: 0})




### Write donw data into S3 location

In [143]:
# Import Dynamic DataFrame class
from awsglue.dynamicframe import DynamicFrame




In [145]:
#Convert from Spark Data Frame to Glue Dynamic Frame
enigma_jhu_dyf = DynamicFrame.fromDF(enigma_jhu_df, glueContext, "convert")




In [146]:
nytimes_data_in_usa_us_county_dyf = DynamicFrame.fromDF(nytimes_data_in_usa_us_county_df, glueContext, "convert")
nytimes_data_in_usa_us_states_dyf = DynamicFrame.fromDF(nytimes_data_in_usa_us_states_df, glueContext, "convert")
rearc_covid19_testing_states_daily_dyf = DynamicFrame.fromDF(rearc_covid19_testing_states_daily_df, glueContext, "convert")
rearc_covid19_testing_us_daily_dyf = DynamicFrame.fromDF(rearc_covid19_testing_us_daily_df, glueContext, "convert")
rearc_covid19_testing_us_total_latest_dyf = DynamicFrame.fromDF(rearc_covid19_testing_us_total_latest_df, glueContext, "convert")
rearc_usa_hospital_beds_dyf = DynamicFrame.fromDF(rearc_usa_hospital_beds_df, glueContext, "convert")
static_dataset_countrycode_dyf = DynamicFrame.fromDF(static_dataset_countrycode_df, glueContext, "convert")
static_dataset_countypopulation_dyf = DynamicFrame.fromDF(static_dataset_countypopulation_df, glueContext, "convert")
static_dataset_state_abv_dyf = DynamicFrame.fromDF(static_dataset_state_abv_df, glueContext, "convert")




In [164]:
# write down the data in converted Dynamic Frame to S3 location.
enigma_jhu_dyf = enigma_jhu_dyf.repartition(1)
glueContext.write_dynamic_frame.from_options(
                            frame = enigma_jhu_dyf,
                            connection_type="s3", 
                            connection_options = {
                                "path": "s3://khanhnv-covid19-result-bucket/output_result/",
                                "partitionKeys": []
                            }, 
                            format = "csv", 
                            format_options={
                                "separator": ","
                                },
                            transformation_ctx = "datasink2")

<awsglue.dynamicframe.DynamicFrame object at 0x7faade758c90>


In [165]:
# write down the data in converted Dynamic Frame to S3 location.
nytimes_data_in_usa_us_county_dyf = nytimes_data_in_usa_us_county_dyf.repartition(1)
glueContext.write_dynamic_frame.from_options(
                            frame = nytimes_data_in_usa_us_county_dyf,
                            connection_type="s3", 
                            connection_options = {
                                "path": "s3://khanhnv-covid19-result-bucket/output_result/",
                                "partitionKeys": []
                            }, 
                            format = "csv", 
                            format_options={
                                "separator": ","
                                },
                            transformation_ctx = "datasink2")

<awsglue.dynamicframe.DynamicFrame object at 0x7faade75c210>


In [166]:
# write down the data in converted Dynamic Frame to S3 location.
nytimes_data_in_usa_us_states_dyf = nytimes_data_in_usa_us_states_dyf.repartition(1)
glueContext.write_dynamic_frame.from_options(
                            frame = nytimes_data_in_usa_us_states_dyf,
                            connection_type="s3", 
                            connection_options = {
                                "path": "s3://khanhnv-covid19-result-bucket/output_result/",
                                "partitionKeys": []
                            }, 
                            format = "csv", 
                            format_options={
                                "separator": ","
                                },
                            transformation_ctx = "datasink2")

<awsglue.dynamicframe.DynamicFrame object at 0x7faade75c750>


In [167]:
# write down the data in converted Dynamic Frame to S3 location.
rearc_covid19_testing_states_daily_dyf = rearc_covid19_testing_states_daily_dyf.repartition(1)
glueContext.write_dynamic_frame.from_options(
                            frame = rearc_covid19_testing_states_daily_dyf,
                            connection_type="s3", 
                            connection_options = {
                                "path": "s3://khanhnv-covid19-result-bucket/output_result/",
                                "partitionKeys": []
                            }, 
                            format = "csv", 
                            format_options={
                                "separator": ","
                                },
                            transformation_ctx = "datasink2")

<awsglue.dynamicframe.DynamicFrame object at 0x7faade75cc90>


In [168]:
# write down the data in converted Dynamic Frame to S3 location.
rearc_covid19_testing_us_daily_dyf = rearc_covid19_testing_us_daily_dyf.repartition(1)
glueContext.write_dynamic_frame.from_options(
                            frame = rearc_covid19_testing_us_daily_dyf,
                            connection_type="s3", 
                            connection_options = {
                                "path": "s3://khanhnv-covid19-result-bucket/output_result/",
                                "partitionKeys": []
                            }, 
                            format = "csv", 
                            format_options={
                                "separator": ","
                                },
                            transformation_ctx = "datasink2")

<awsglue.dynamicframe.DynamicFrame object at 0x7faade5e1210>


In [169]:
rearc_covid19_testing_us_total_latest_dyf = rearc_covid19_testing_us_total_latest_dyf.repartition(1)
glueContext.write_dynamic_frame.from_options(
                            frame = rearc_covid19_testing_us_total_latest_dyf,
                            connection_type="s3", 
                            connection_options = {
                                "path": "s3://khanhnv-covid19-result-bucket/output_result/",
                                "partitionKeys": []
                            }, 
                            format = "csv", 
                            format_options={
                                "separator": ","
                                },
                            transformation_ctx = "datasink2")

<awsglue.dynamicframe.DynamicFrame object at 0x7faade5e1750>


In [170]:
rearc_usa_hospital_beds_dyf = rearc_usa_hospital_beds_dyf.repartition(1)
glueContext.write_dynamic_frame.from_options(
                            frame = rearc_usa_hospital_beds_dyf,
                            connection_type="s3", 
                            connection_options = {
                                "path": "s3://khanhnv-covid19-result-bucket/output_result/",
                                "partitionKeys": []
                            }, 
                            format = "csv", 
                            format_options={
                                "separator": ","
                                },
                            transformation_ctx = "datasink2")

<awsglue.dynamicframe.DynamicFrame object at 0x7faade75c9d0>


In [171]:
static_dataset_countrycode_dyf = static_dataset_countrycode_dyf.repartition(1)
glueContext.write_dynamic_frame.from_options(
                            frame = static_dataset_countrycode_dyf,
                            connection_type="s3", 
                            connection_options = {
                                "path": "s3://khanhnv-covid19-result-bucket/output_result/",
                                "partitionKeys": []
                            }, 
                            format = "csv", 
                            format_options={
                                "separator": ","
                                },
                            transformation_ctx = "datasink2")

<awsglue.dynamicframe.DynamicFrame object at 0x7faadfbca190>


In [172]:
static_dataset_countypopulation_dyf  = static_dataset_countypopulation_dyf.repartition(1)
glueContext.write_dynamic_frame.from_options(
                            frame = static_dataset_countypopulation_dyf,
                            connection_type="s3", 
                            connection_options = {
                                "path": "s3://khanhnv-covid19-result-bucket/output_result/",
                                "partitionKeys": []
                            }, 
                            format = "csv", 
                            format_options={
                                "separator": ","
                                },
                            transformation_ctx = "datasink2")

<awsglue.dynamicframe.DynamicFrame object at 0x7faade758cd0>


In [173]:
static_dataset_state_abv_dyf = static_dataset_state_abv_dyf.repartition(1)
glueContext.write_dynamic_frame.from_options(
                            frame = static_dataset_state_abv_dyf,
                            connection_type="s3", 
                            connection_options = {
                                "path": "s3://khanhnv-covid19-result-bucket/output_result/",
                                "partitionKeys": []
                            }, 
                            format = "csv", 
                            format_options={
                                "separator": ","
                                },
                            transformation_ctx = "datasink2")

<awsglue.dynamicframe.DynamicFrame object at 0x7faade758850>
