In [None]:
#Importing all the basic Glue, Spark libraries 

import os, sys, boto3
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
# Important further required libraries
from pprint import pprint
from awsglue.dynamicframe import DynamicFrame
from pyspark.sql.functions import udf, col
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, TimestampType
from datetime import datetime

# Starting Spark/Glue Context

sc = SparkContext.getOrCreate()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)
schema = StructType([
    StructField("user_id", StringType(), True),
    StructField("date", StringType(), True),
    StructField("medicine_time1", StringType(), True),
    StructField("medicine_time2", StringType(), True),
    StructField("medicine_time3", StringType(), True),
    StructField("memo_mask", StringType(), True),
    StructField("memo_text", StringType(), True),
    StructField("created_at", StringType(), True),
    StructField("updated_at", StringType(), True)
])
# AWS configuration
s3_bucket_name = "s3://dynamodb-csv-importing/diaries/"
region_name = 'ap-northeast-1'
ddb_table_name = 'BPDiary-diaries_performance'

# List of files to import
file_list = [
    "diaries-dummy-1.csv", "diaries-dummy-10.csv", "diaries-dummy-11.csv", "diaries-dummy-12.csv", "diaries-dummy-13.csv", "diaries-dummy-14.csv", "diaries-dummy-15.csv", "diaries-dummy-16.csv", "diaries-dummy-17.csv", "diaries-dummy-18.csv", "diaries-dummy-19.csv", "diaries-dummy-2.csv", "diaries-dummy-20.csv", "diaries-dummy-21.csv", "diaries-dummy-22.csv", "diaries-dummy-23.csv", "diaries-dummy-24.csv", "diaries-dummy-25.csv", "diaries-dummy-3.csv", "diaries-dummy-4-1.csv", "diaries-dummy-4.csv", "diaries-dummy-5.csv", "diaries-dummy-6.csv", "diaries-dummy-7.csv", "diaries-dummy-8.csv", "diaries-dummy-9.csv", "diaries.csv"
]

# Read each file and union them into a single DataFrame
df_list = []
for file_name in file_list:
    df = spark.read.load(s3_bucket_name + file_name, 
                         format="csv", 
                         sep=",", 
                         inferSchema="true",
                         schema=schema,
                         header="true")
    df_list.append(df)

df = df_list[0]
for temp_df in df_list[1:]:
    df = df.union(temp_df)

# transform DataFrame into DynamicFrame
df_dyf = DynamicFrame.fromDF(df, glueContext, "df_dyf")

# write data to DynamoDB
print("Start writing to DynamoDB: {}".format(datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
glueContext.write_dynamic_frame_from_options(
    frame=df_dyf,
    connection_type="dynamodb",
    connection_options={
        "dynamodb.output.tableName": ddb_table_name,
        "dynamodb.throughput.write.percent": "1.0"
    }
)

print(f"Schema of DataFrame: {df.printSchema()}")
print(f"Preview of DataFrame: {df.show(5)}")

print("Finished writing to DynamoDB: {}".format(datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
# count data
print(f"Number of records written: {df.count()}")
