In [None]:
#Importing all the basic Glue, Spark libraries 

import os, sys
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
# Important further required libraries
from awsglue.dynamicframe import DynamicFrame
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
from datetime import datetime

# Starting Spark/Glue Context

sc = SparkContext.getOrCreate()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)
schema = StructType([
    StructField("user_id", StringType(), True),
    StructField("systolic_target", IntegerType(), True),
    StructField("diastolic_target", IntegerType(), True),
    StructField("medicine_per_day", IntegerType(), True),
    StructField("medicine_alarm_time1", StringType(), True),
    StructField("medicine_alarm_time2", StringType(), True),
    StructField("medicine_alarm_time3", StringType(), True),
    StructField("medicine_alarm_state", IntegerType(), True),
    StructField("bp_morning_start", StringType(), True),
    StructField("bp_morning_end", StringType(), True),
    StructField("bp_evening_start", StringType(), True),
    StructField("bp_evening_end", StringType(), True),
    StructField("graph_type", IntegerType(), True),
    StructField("bp_scale_min_def", IntegerType(), True),
    StructField("bp_scale_max_def", IntegerType(), True),
    StructField("graph_mask", IntegerType(), True),
    StructField("initial_setting_done", IntegerType(), True),
    StructField("tutorial_done_mask", IntegerType(), True),
    StructField("memo_mask", IntegerType(), True),
    StructField("medal_3_days", IntegerType(), True),
    StructField("medal_5_days", IntegerType(), True),
    StructField("medal_7_days", IntegerType(), True),
    StructField("medal_14_days", IntegerType(), True),
    StructField("medal_30_days", IntegerType(), True),
    StructField("finish_initial", IntegerType(), True),
    StructField("show_tutorial", IntegerType(), True),
    StructField("on_boarding", StringType(), True),
    StructField("finish_register", IntegerType(), True),
    StructField("created_at", StringType(), True),
    StructField("updated_at", StringType(), True),
    StructField("email_popup_displayed", IntegerType(), True),
    StructField("memo_icon_popup_displayed", IntegerType(), True),
    StructField("medicine_register", IntegerType(), True),
    StructField("logbook_separation_popup_displayed", IntegerType(), True),
    StructField("premium_popup_displayed", IntegerType(), True),
    StructField("how_to_graph_popup_displayed", IntegerType(), True)
])
# AWS configuration
s3_bucket_name = "s3://dynamodb-csv-importing/settings/"
ddb_table_name = 'BPDiary-settings_performance'

# Read file from S3
file_list = [
    "settings.csv", "settings-dummy.csv"
]

# Read each file and union them into a single DataFrame
df_list = []
for file_name in file_list:
    df = spark.read.load(s3_bucket_name + file_name, 
                         format="csv", 
                         sep=",", 
                         inferSchema="true",
                         schema=schema,
                         header="true")
    df_list.append(df)

df = df_list[0]
for temp_df in df_list[1:]:
    df = df.union(temp_df)

# transform DataFrame into DynamicFrame
df_dyf = DynamicFrame.fromDF(df, glueContext, "df_dyf")

# write data to DynamoDB
print("Start writing to DynamoDB: {}".format(datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
glueContext.write_dynamic_frame_from_options(
    frame=df_dyf,
    connection_type="dynamodb",
    connection_options={
        "dynamodb.output.tableName": ddb_table_name,
        "dynamodb.throughput.write.percent": "1.0"
    }
)

print(f"Schema of DataFrame: {df.printSchema()}")
print(f"Preview of DataFrame: {df.show(5)}")

print("Finished writing to DynamoDB: {}".format(datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
# count data
print(f"Number of records written: {df.count()}")
