# S023-getraw Job
### 병원/약국 정보를 입수하여 초기 데이터를 적재하기 위한 Job
### 기존 Airflow의 다음 부분이 해당
```python
get_raw >> get_raw_2
```

In [1]:
%idle_timeout 60
%glue_version 5.0
%worker_type G.1X
%number_of_workers 2

import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
  
sc = SparkContext.getOrCreate() 
glueContext = GlueContext(sc) 
spark = glueContext.spark_session
job = Job(glueContext) 

Welcome to the Glue Interactive Sessions Kernel
For more information on available magic commands, please type %help in any new cell.

Please view our Getting Started page to access the most up-to-date information on the Interactive Sessions kernel: https://docs.aws.amazon.com/glue/latest/dg/interactive-sessions.html
Installed kernel version: 1.0.8 
Current idle_timeout is None minutes.
idle_timeout has been set to 60 minutes.
Setting Glue version to: 5.0
Previous worker type: None
Setting new worker type to: G.1X
Previous number of workers: None
Setting new number of workers to: 2
Trying to create a Glue session for the kernel.
Session Type: glueetl
Worker Type: G.1X
Number of Workers: 2
Idle Timeout: 60
Session ID: cdc0a39b-02e0-4740-9b47-63e62d5d9031
Applying the following default arguments:
--glue_kernel_version 1.0.8
--enable-glue-datacatalog true
Waiting for session cdc0a39b-02e0-4740-9b47-63e62d5d9031 to get into ready status...
Session cdc0a39b-02e0-4740-9b47-63e62d5d9031 has be

In [4]:
# MWAA에서 전달한 파라미터들을 받기
args = getResolvedOptions(sys.argv, [
    'JOB_NAME',           # 기본 파라미터 (항상 있음) \
    'yyyymm'      # --database_name=prod_db
]) 


s3_bucket = "s3://demo.nice.co.kr.datalake"
yyyymm = args['yyyymm'] # 실제로는 Param으로 받아오기
# yyyymm = "202507"
YYYY = yyyymm[:4]  # "2025"
MM = yyyymm[4:6]   # "07"

GlueArgumentError: the following arguments are required: --JOB_NAME, --yyyymm


## K카드 월 데이터 읽어오기 : getRaw.scala

In [18]:
KB_GAIN_1_df = spark.read \
    .option("encoding", "utf-8") \
    .option("header", "true") \
    .csv(f"{s3_bucket}/DI/CARD/KB_MEDI/ORG/"+ YYYY + "/" + MM +"/KB국민카드_의료서비스_개인파트1_월별매출집계데이터_" + yyyymm + ".csv") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".yyyymm","yyyymm") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".key","key") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".mega_gubun","mega_gubun") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".admi_cd","admi_cd") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".admi_nm","admi_nm") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".upjong1_cd","upjong1_cd") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".upjong1_nm","upjong1_nm") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".upjong2_cd","upjong2_cd") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".upjong2_nm","upjong2_nm") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".upjong3_cd","upjong3_cd") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".upjong3_nm","upjong3_nm") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_cnt","sale_cnt") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_amt","sale_amt") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_m_20under","cnt_m_20under") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_m_2024","cnt_m_2024") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_m_2529","cnt_m_2529") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_m_3034","cnt_m_3034") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_m_3539","cnt_m_3539") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_m_4044","cnt_m_4044") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_m_4549","cnt_m_4549") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_m_5054","cnt_m_5054") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_m_5559","cnt_m_5559") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_m_6064","cnt_m_6064") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_m_6569","cnt_m_6569") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_m_70over","cnt_m_70over") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_w_20under","cnt_w_20under") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_w_2024","cnt_w_2024") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_w_2529","cnt_w_2529") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_w_3034","cnt_w_3034") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_w_3539","cnt_w_3539") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_w_4044","cnt_w_4044") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_w_4549","cnt_w_4549") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_w_5054","cnt_w_5054") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_w_5559","cnt_w_5559") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_w_6064","cnt_w_6064") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_w_6569","cnt_w_6569") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_w_70over","cnt_w_70over") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_m_20under","sale_m_20under") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_m_2024","sale_m_2024") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_m_2529","sale_m_2529") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_m_3034","sale_m_3034") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_m_3539","sale_m_3539") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_m_4044","sale_m_4044") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_m_4549","sale_m_4549") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_m_5054","sale_m_5054") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_m_5559","sale_m_5559") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_m_6064","sale_m_6064") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_m_6569","sale_m_6569") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_m_70over","sale_m_70over") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_w_20under","sale_w_20under") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_w_2024","sale_w_2024") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_w_2529","sale_w_2529") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_w_3034","sale_w_3034") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_w_3539","sale_w_3539") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_w_4044","sale_w_4044") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_w_4549","sale_w_4549") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_w_5054","sale_w_5054") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_w_5559","sale_w_5559") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_w_6064","sale_w_6064") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_w_6569","sale_w_6569") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_w_70over","sale_w_70over") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_mon","cnt_mon") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_tue","cnt_tue") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_wed","cnt_wed") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_thu","cnt_thu") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_fri","cnt_fri") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_sat","cnt_sat") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_sun","cnt_sun") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_mon","sale_mon") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_tue","sale_tue") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_wed","sale_wed") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_thu","sale_thu") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_fri","sale_fri") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_sat","sale_sat") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_sun","sale_sun") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_0003","cnt_0003") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_0306","cnt_0306") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_0609","cnt_0609") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_0912","cnt_0912") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_1215","cnt_1215") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_1518","cnt_1518") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_1821","cnt_1821") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_2124","cnt_2124") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_0003","sale_0003") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_0306","sale_0306") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_0609","sale_0609") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_0912","sale_0912") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_1215","sale_1215") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_1518","sale_1518") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_1821","sale_1821") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_2124","sale_2124") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_12m","cnt_12m") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_12m","sale_12m") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_6m","cnt_6m") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_6m","sale_6m") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_3m","cnt_3m") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_3m","sale_3m") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_1m","cnt_1m") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_1m","sale_1m") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".cnt_new","cnt_new") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".sale_new","sale_new") \
    .withColumnRenamed("medical_indv_p1_final_"+ yyyymm +".term_month","term_month")




In [20]:
KB_GAIN_1_df.write \
    .mode("overwrite") \
    .parquet(f"{s3_bucket}/DI/CARD/KB_MEDI/RAW//GAIN_1/"+ YYYY + "/" + MM)




## getRaw_2.scala

In [24]:
KB_GAIN_2_df = spark.read \
    .option("encoding", "utf-8") \
    .option("header", "true") \
    .csv(f"{s3_bucket}/DI/CARD/KB_MEDI/ORG/"+ YYYY +"/" + MM +"/KB국민카드_의료서비스_개인파트2_월별매출집계데이터_" + yyyymm + ".csv") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".yyyymm","yyyymm") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".key","key") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".mega_gubun","mega_gubun") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".admi_cd","admi_cd") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".admi_nm","admi_nm") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".upjong1_cd","upjong1_cd") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".upjong1_nm","upjong1_nm") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".upjong2_cd","upjong2_cd") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".upjong2_nm","upjong2_nm") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".upjong3_cd","upjong3_cd") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".upjong3_nm","upjong3_nm") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".area1_nm","area1_nm") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".area1_ratio","area1_ratio") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".area2_nm","area2_nm") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".area2_ratio","area2_ratio") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".area3_nm","area3_nm") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".area3_ratio","area3_ratio") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".area4_nm","area4_nm") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".area4_ratio","area4_ratio") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".area5_nm","area5_nm") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".area5_ratio","area5_ratio") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".area6_nm","area6_nm") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".area6_ratio","area6_ratio") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".area7_nm","area7_nm") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".area7_ratio","area7_ratio") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".area8_nm","area8_nm") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".area8_ratio","area8_ratio") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".area9_nm","area9_nm") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".area9_ratio","area9_ratio") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".area10_nm","area10_nm") \
    .withColumnRenamed("medical_indv_p2_final_"+ yyyymm +".area10_ratio","area10_ratio")




In [25]:
KB_GAIN_2_df.write \
    .mode("overwrite") \
    .parquet(f"{s3_bucket}/DI/CARD/KB_MEDI/RAW//GAIN_2/"+ YYYY + "/" + MM)


