# AWS Glue Studio Notebook
##### You are now running a AWS Glue Studio notebook; To start using your notebook you need to start an AWS Glue Interactive Session.


#### Optional: Run this cell to see available notebook commands ("magics").


In [None]:
%help

####  Run this cell to set up and start your interactive session.


In [1]:
%idle_timeout 2880
%glue_version 4.0
%worker_type G.1X
%number_of_workers 5

import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
  
sc = SparkContext.getOrCreate()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)

Current idle_timeout is 2880 minutes.
idle_timeout has been set to 2880 minutes.
Setting Glue version to: 4.0
Previous worker type: G.1X
Setting new worker type to: G.1X
Previous number of workers: 5
Setting new number of workers to: 5
Trying to create a Glue session for the kernel.
Session Type: glueetl
Worker Type: G.1X
Number of Workers: 5
Idle Timeout: 2880
Session ID: b081ca53-d230-487a-b0f4-78def75a79df
Applying the following default arguments:
--glue_kernel_version 1.0.5
--enable-glue-datacatalog true
Waiting for session b081ca53-d230-487a-b0f4-78def75a79df to get into ready status...
Session b081ca53-d230-487a-b0f4-78def75a79df has been created.



#### Example: Create a DynamicFrame from a table in the AWS Glue Data Catalog and display its schema


In [2]:
dyf = glueContext.create_dynamic_frame.from_catalog(database='syntheticdata', table_name='employee')
dyf.printSchema()

root
|-- employee_id: string
|-- dept_id: string
|-- employee_level: string
|-- employment_start_dt: string
|-- increment_dt: string


#### Example: Convert the DynamicFrame to a Spark DataFrame and display a sample of the data


In [3]:
df = dyf.toDF()
df.show()

+-----------+-------+--------------+-------------------+------------+
|employee_id|dept_id|employee_level|employment_start_dt|increment_dt|
+-----------+-------+--------------+-------------------+------------+
|    X110096|   4086|          null|           20240924|        null|
|    X110088|   4078|          null|           20240924|        null|
|    X110011|   4001|          null|           20240924|        null|
|    X110037|   4027|          null|           20240924|        null|
|    X110062|   4052|          null|           20240924|        null|
|    X114535|   4521|          null|           20240925|        null|
|    X115566|   4551|          null|           20240925|        null|
|    X118353|   4335|          null|           20240925|        null|
|    X110548|   4538|          null|           20240925|        null|
|    X112198|   4186|          null|           20240925|        null|
|    X116149|   4133|          null|           20240925|        null|
|    X116800|   4784

#### Example: Visualize data with matplotlib


In [25]:
from botocore.exceptions import ClientError
try:
    glueContext.write_dynamic_frame_from_options(
                    frame = dyf,
                    connection_type="dynamodb",
                    connection_options={"dynamodb.output.tableName": "employee_department_interactions", "dynamodb.throughput.write.percent":"1.0"}
                    )
except Exception as e:
    error_message = str(e)
    print("error message is ", error_message, "could you spot me" )
    if "ResourceNotFoundException" in error_message:
        print("Lets create the table")

    # if True:#e.response['Error']['Code'] == 'ResourceNotFoundException':
    #                 logger.error(f"Table {dynamodb_table} does not exist. Creating it with default PK SK values")
    #                 create_dynamodb_table(dynamodb_table, logger)
    #                 synthesize(glueContext,
    #                            spark,
    #                            config_dict,
    #                            dynamodb_table,
    #                            keyfilepath,
    #                            logger,
    #                            execmode)
    # else:
    #     logger.error(f"Unexpected error: {e}")
    #     raise         

error message is  An error occurred while calling o136.pyWriteDynamicFrame.
: com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException: Requested resource not found: Table: employee_department_interactions not found (Service: AmazonDynamoDBv2; Status Code: 400; Error Code: ResourceNotFoundException; Request ID: IB38I1ED9PH9TU89AEJU5Q5673VV4KQNSO5AEMVJF66Q9ASUAAJG; Proxy: null)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleErrorResponse(AmazonHttpClient.java:1879)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleServiceErrorResponse(AmazonHttpClient.java:1418)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1387)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1157)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:814)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:781)
	at

#### Example: Write the data in the DynamicFrame to a location in Amazon S3 and a table for it in the AWS Glue Data Catalog


In [None]:
s3output = glueContext.getSink(
  path="s3://bucket_name/folder_name",
  connection_type="s3",
  updateBehavior="UPDATE_IN_DATABASE",
  partitionKeys=[],
  compression="snappy",
  enableUpdateCatalog=True,
  transformation_ctx="s3output",
)
s3output.setCatalogInfo(
  catalogDatabase="demo", catalogTableName="populations"
)
s3output.setFormat("glueparquet")
s3output.writeFrame(DyF)