# AWS Glue Studio Notebook
##### You are now running a AWS Glue Studio notebook; To start using your notebook you need to start an AWS Glue Interactive Session.


#### Optional: Run this cell to see available notebook commands ("magics").


In [None]:
%help

In [2]:
%%configure
{
  "write-shuffle-files-to-s3": "true",
  "write-shuffle-spills-to-s3": "true"
}

Welcome to the Glue Interactive Sessions Kernel
For more information on available magic commands, please type %help in any new cell.

Please view our Getting Started page to access the most up-to-date information on the Interactive Sessions kernel: https://docs.aws.amazon.com/glue/latest/dg/interactive-sessions.html
Installed kernel version: 0.38.1 
The following configurations have been updated: {'write-shuffle-spills-to-s3': 'true'}


####  Run this cell to set up and start your interactive session.


In [None]:
%idle_timeout 2880
%glue_version 4.0
%worker_type G.2X
%number_of_workers 2
%connections bi_oracle_rbi1tst
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
  
sc = SparkContext.getOrCreate()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)

#### Example: Create a DynamicFrame from a table in the AWS Glue Data Catalog and display its schema


In [None]:
dyf = glueContext.create_dynamic_frame.from_catalog(database='bi_oracle_rbi1tst', table_name='rbi1tst_warehouse_master_academic_general_lkp')
dyf.printSchema()

#### Example: Convert the DynamicFrame to a Spark DataFrame and display a sample of the data


In [None]:
df = dyf.toDF()
df.show()

In [6]:
dyf = glueContext.create_dynamic_frame.from_catalog(database='bi_oracle_rbi1tst', table_name='rbi1tst_warehouse_master_ap_evaluate')
dyf.printSchema()

root
|-- YEAR: decimal
|-- STAFF_ID: string
|-- HEW_CATEGORY_NARROW: string
|-- POSITION_CATEGORY_DESCRIPTION: string
|-- CURR_LEVEL_3_CODE: string
|-- CURR_FACULTY_NAME: string
|-- CURR_LEVEL_4_CODE: string
|-- CURR_SCHOOL_AREA_NAME: string
|-- STUDY_PERIOD_NAME: string
|-- UNIT_SPK_NO: decimal
|-- USER_DEFINED_CODE: string
|-- UNIT_ACTIVE_STATUS: string
|-- UNIT_FULL_TITLE: string
|-- UNIT_SHORT_TITLE: string
|-- UNIT_BROAD_CATEGORY: string
|-- UNIT_NARROW_CATEGORY: string
|-- UNIT_DETAILED_CATEGORY: string
|-- UNIT_RESEARCH_OR_COURSEWORK: string
|-- PRIMARY_BROAD_FOE_DESC: string
|-- SECONDARY_BROAD_FOE_DESC: string
|-- SURVEY_TITLE: string
|-- SCALE_NAME: string
|-- EVENT_NAME: string
|-- MEASURE: string
|-- RESPONSES: decimal
|-- AGREE: decimal
|-- DIAGREE: decimal
|-- UNABLETOJUDGE: decimal
|-- AGREE_PCT: decimal
|-- DISAGREE_PCT: decimal
|-- UNABLETOJUDGE_PCT: decimal


In [None]:
df = dyf.toDF()
df.show()

In [None]:
dyf = dyf.coalesce(1)
glueContext.write_dynamic_frame.from_options(
    frame=dyf,
    connection_type="s3",
    format= "glueparquet",
    connection_options={
        "path": "s3://bi-oracle-123456/evaluate",
        "partitionKeys":[],
        },
    transformation_ctx="s3output")
job.commit()

#### Example: Visualize data with matplotlib


#### Example: Write the data in the DynamicFrame to a location in Amazon S3 and a table for it in the AWS Glue Data Catalog


In [2]:
dyf = glueContext.create_dynamic_frame.from_catalog(database='bi_oracle_rbi1tst', table_name='rbi1tst_warehouse_master_evl_fact_flat')
dyf.printSchema()

root
|-- USER_ID: string
|-- UNIT_ID: string
|-- PUBLISHABLE: string
|-- PUBLISHABLE_COURSE: string
|-- PUBLISHABLE_MAJOR: string
|-- STU_SUPPRESS_FG: string
|-- REPRESENTATIVE: string
|-- STU_RESPONSE_ID: string
|-- SURVEY_RESPONSE_ID: string
|-- ENROLMENT_ID: string
|-- AGREEMENT_ID: string
|-- DISAGREEMENT_ID: string
|-- UJ_ID: string
|-- AGREE_BROAD: decimal
|-- AGREE_DETAIL: decimal
|-- STRONGLY_AGREE: decimal
|-- DISAGREE_BROAD: decimal
|-- DISAGREE_DETAIL: decimal
|-- STRONGLY_DISAGREE: decimal
|-- UJ: decimal
|-- QUESTIONS_CHECK_ANSWERED: decimal
|-- NO_RESPONSE: decimal
|-- SURVEY_ID: decimal
|-- STUDENT_ID: string
|-- QUESTION_ID: decimal
|-- RESPONSE_ID: decimal
|-- SSP_NO: decimal
|-- STATUS_ID: decimal
|-- UC_SUPPRESS_FG: string
|-- POP_SUPPRESS_FG: string
|-- CRS_POP_SUPPRESS_FG: string
|-- CSR_SUPPRESS_FG: string
|-- PUBLIC_SUPPRESS_FG: string
|-- USR_SUPPRESS_FG: string
|-- MJR_POP_SUPPRESS_FG: string
|-- MJR_SUPPRESS_FG: string
|-- QUALIFIED_MAJOR: string
|-- UNIT_SUPP

In [3]:
df = dyf.toDF()
df.show()

+--------------------+--------------------+--------------------+--------------------+--------------------+---------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+-----+------------+------------+--------------+--------------+---------------+-----------------+-----+------------------------+------------+-----------------+----------+-------------+-------------------+--------+------------+--------------+---------------+-------------------+---------------+------------------+---------------+-------------------+---------------+--------------------+----------------+------------------+---------------+--------------+----------------+-----------------------+-----------------+---------------------+----------------------+----------+---------------------+--------------------+----------------------+--------------------------+------------------------------+------------------------------+------------------+------------

In [None]:
dyf = dyf.coalesce(1)
glueContext.write_dynamic_frame.from_options(
    frame=dyf,
    connection_type="s3",
    format= "glueparquet",
    connection_options={
        "path": "s3://bi-oracle-123456/evaluate_flat",
        "partitionKeys":[],
        },
    transformation_ctx="s3output")
job.commit()