# Seed 생성 확인용 테스트 노트북

dbt seed로 생성된 seed 테이블 확인용

In [1]:
import sys

sys.path.append("/app")

from src.utils.spark_builder import get_spark_session
from pyspark.sql import functions as F

# Spark 세션 생성
spark = get_spark_session("GDELT_Seed_Test", "spark://spark-master:7077")
print("✅ Spark 세션 생성 완료")

25/09/21 18:45:38 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


✅ Spark 세션 생성 완료


In [2]:
# 1. Spark Metastore에 존재하는 모든 데이터베이스(스키마) 목록을 확인합니다.
print("📖 사용 가능한 데이터베이스(스키마) 목록:")
spark.sql("SHOW DATABASES").show()

📖 사용 가능한 데이터베이스(스키마) 목록:
+---------+
|namespace|
+---------+
|  default|
|     gold|
|   silver|
+---------+



In [3]:
# 2. 'default' 스키마 안에 있는 테이블 목록을 확인합니다.
# dbt seed로 생성된 테이블이 여기에 보여야 합니다. (gdelt_silver_events는 seed 테이블이 아닙니다.)
print("📜 'default' 스키마의 테이블 목록:")
spark.sql("SHOW TABLES IN default").show()

📜 'default' 스키마의 테이블 목록:


[Stage 0:>                                                          (0 + 1) / 1]

+---------+--------------------+-----------+
|namespace|           tableName|isTemporary|
+---------+--------------------+-----------+
|  default|actor_ethnic_grou...|      false|
|  default|actor_organizatio...|      false|
|  default|actor_religion_codes|      false|
|  default|    actor_role_codes|      false|
|  default|  event_detail_codes|      false|
|  default|event_quad_class_...|      false|
|  default|    event_root_codes|      false|
|  default| gdelt_silver_events|      false|
|  default|       geo_adm_codes|      false|
|  default|   geo_country_codes|      false|
|  default|geo_country_fips_...|      false|
|  default|geo_country_iso_c...|      false|
|  default|      geo_type_codes|      false|
+---------+--------------------+-----------+



                                                                                

In [None]:
# 3. 총 10개의 seed 테이블의 샘플 데이터를 5개씩 출력하여 내용 확인
seed_tables = [
    "actor_ethnic_group_codes",
    "actor_organization_codes",
    "actor_religion_codes",
    "actor_role_codes",
    "event_root_codes",
    "event_detail_codes",
    "event_quad_class_codes",
    "geo_adm_codes",
    "geo_country_codes",
    "geo_type_codes",
]

for table in seed_tables:
    print(f"\n 🔍 {table} 테이블 샘플 데이터")
    try:
        spark.sql(f"SELECT * FROM default.{table} LIMIT 5").show(truncate=False)
    except Exception as e:
        print(f"❌ 테이블 조회 중, 오류 발생: {e}")


 🔍 actor_ethnic_group_codes 테이블 샘플 데이터


25/09/21 18:46:45 WARN SessionState: METASTORE_FILTER_HOOK will be ignored, since hive.security.authorization.manager is set to instance of HiveAuthorizerFactory.
25/09/21 18:46:45 WARN MetricsConfig: Cannot locate configuration: tried hadoop-metrics2-s3a-file-system.properties,hadoop-metrics2.properties
                                                                                

+----+---------------------+
|code|description          |
+----+---------------------+
|aar |아파르족             |
|abk |압하스인             |
|abr |오스트레일리아 원주민|
|ace |아체인               |
|acg |아창족               |
+----+---------------------+


 🔍 actor_organization_codes 테이블 샘플 데이터
+----+----+--------------------------+
|code|type|description               |
+----+----+--------------------------+
|BHF |정부|보스니아 헤르체고비나 연방|
|FTA |정부|파타                      |
|HAM |정부|하마스                    |
|RPF |정부|르완다 애국 전선          |
|SRP |정부|스릅스카 공화국           |
+----+----+--------------------------+


 🔍 actor_religion_codes 테이블 샘플 데이터
+----+----+---------------+
|code|type|description    |
+----+----+---------------+
|REL |일반|종교 (미지정)  |
|ATH |이념|불가지론/무신론|
|BAH |종교|바하이 신앙    |
|BUD |종교|불교           |
|MAH |종파|대승불교       |
+----+----+---------------+


 🔍 actor_role_codes 테이블 샘플 데이터
+----+--------+--------------+
|code|type    |description   |
+----+--------+--------------+
|COP |1차 역할|경찰          |
|GOV

생성된 seed 테이블을 삭제할 경우, 아래 코드에 "삭제할 seed 테이블명"을 입력하여 실행

In [None]:
# 삭제할 seed 테이블 이름 목록
seed_tables_to_drop = [
    # "[삭제할 seed 테이블명]"
]
print("🗑️ 기존 Seed 테이블 삭제를 시작합니다.")

for table_name in seed_tables_to_drop:
    try:
        spark.sql(f"DROP TABLE IF EXISTS default.{table_name}")
        print(f"  - 테이블 'default.{table_name}' 삭제 완료.")
    except Exception as e:
        print(f"  - 테이블 'default.{table_name}' 삭제 중 오류 발생: {e}")

print("\n✅ 모든 Seed 테이블 삭제 작업이 완료되었습니다.")

# 삭제 후 테이블 목록을 다시 확인하여 깨끗해졌는지 확인
print("📜 현재 'default' 스키마의 테이블 목록:")
spark.sql("SHOW TABLES IN default").show()

🗑️ 기존 Seed 테이블 삭제를 시작합니다.
  - 테이블 'default.geo_country_fips_codes' 삭제 완료.
  - 테이블 'default.geo_country_iso_codes' 삭제 완료.

✅ 모든 Seed 테이블 삭제 작업이 완료되었습니다.
📜 현재 'default' 스키마의 테이블 목록:
+---------+--------------------+-----------+
|namespace|           tableName|isTemporary|
+---------+--------------------+-----------+
|  default|actor_ethnic_grou...|      false|
|  default|actor_organizatio...|      false|
|  default|actor_religion_codes|      false|
|  default|    actor_role_codes|      false|
|  default|  event_detail_codes|      false|
|  default|event_quad_class_...|      false|
|  default|    event_root_codes|      false|
|  default| gdelt_silver_events|      false|
|  default|       geo_adm_codes|      false|
|  default|   geo_country_codes|      false|
|  default|      geo_type_codes|      false|
+---------+--------------------+-----------+



Staging 폴더의 stg_seed_mapping.sql을 dbt run한 결과 확인

In [8]:
# stg_seed_mapping 모델 결과 확인
print("🥈 [stg_seed_mapping] 뷰 : 상위 20개 데이터")
spark.sql("SELECT * FROM silver.stg_seed_mapping LIMIT 20").show(truncate=False)

# Spark DataFrame을 Pandas DataFrame으로 변환하여 출력
# df = spark.sql("SELECT * FROM silver.stg_seed_mapping LIMIT 20").toPandas()
# df

🥈 [stg_seed_mapping] 뷰 : 상위 20개 데이터
+---------------+----------+-------------+-------------------+------------------------+----------+-------------+---------------+------------+-----------+------------+-----------------+-----------+--------------+-------------------+----------------------+--------------------------------------------------------+---------------------+------------------+-----------------------------------------------+-----------------------+----------------------------+-------------------------+--------------------+--------------+---------------+---------------------+-----------+--------------+-------------------+----------------------+----------------------------+---------------------+------------------+-----------------------------------------------+-----------------------+----------------------------+-------------------------+--------------------+--------------+---------------+---------------------+------------------+-----------------------------------------------+---

Staging 폴더의 stg_seed_actors_parsed.sql을 dbt run한 결과 확인

In [10]:
# stg_seed_actors_parsed 모델 결과 확인
print("🥈 [stg_seed_actors_parsed] 뷰 : 상위 20개 데이터")
spark.sql("SELECT * FROM silver.stg_seed_actors_parsed LIMIT 20").show(truncate=False)

🥈 [stg_seed_actors_parsed] 뷰 : 상위 20개 데이터
+----------+--------------+-----+------------+-----+----------------+-----+------------+-----+
|actor_code|description1  |type1|description2|type2|description3    |type3|description4|type4|
+----------+--------------+-----+------------+-----+----------------+-----+------------+-----+
|PSE       |팔레스타인    |국가 |null        |null |null            |null |null        |null |
|ELI       |엘리트        |역할 |null        |null |null            |null |null        |null |
|LTUGOV    |리투아니아    |국가 |정부        |역할 |null            |null |null        |null |
|PSEGOV    |팔레스타인    |국가 |정부        |역할 |null            |null |null        |null |
|LVA       |라트비아      |국가 |null        |null |null            |null |null        |null |
|REL       |종교 (미지정) |종교 |null        |null |null            |null |null        |null |
|SWEGOV    |스웨덴        |국가 |정부        |역할 |null            |null |null        |null |
|DEUMIL    |독일          |국가 |군대        |역할 |null            |nul

Staging 폴더의 stg_actors_description.sql을 dbt run한 결과 확인

In [None]:
# stg_actors_description 모델 결과 확인
print("🥈 [stg_actors_description] 뷰 : 상위 20개 데이터")
spark.sql("SELECT * FROM silver.stg_actors_description LIMIT 20").show(truncate=False)

25/09/21 23:53:34 ERROR TaskSchedulerImpl: Lost executor 0 on 172.18.0.19: worker lost
25/09/21 23:53:34 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_420_28 !
25/09/21 23:53:34 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_416_23 !
25/09/21 23:53:34 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_79_5 !
25/09/21 23:53:34 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_75_29 !
25/09/21 23:53:34 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_416_13 !
25/09/21 23:53:34 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_420_43 !
25/09/21 23:53:34 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_79_18 !
25/09/21 23:53:34 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_79_9 !
25/09/21 23:53:34 WARN BlockManagerMasterEndpoint: No more replicas available for rdd_79_16 !
25/09/21 23:53:34 WARN BlockManagerMasterEndpoint: No more replic

Marts 폴더의 gold_1st_global_overview.sql을 dbt run한 결과 확인

In [11]:
# gold_1st_global_overview 모델 결과 확인
print("🥇 [gold_1st_global_overview] 뷰")
spark.sql("SELECT * FROM gold.gold_1st_global_overview").show(truncate=False)

🥇 [gold_1st_global_overview] 뷰


                                                                                

+----------+--------------+---------------+--------------------+-------------------+------------------+-----------------+------------------+-------------------+-----------+-------------------------+
|event_date|action_geo_iso|action_geo_name|risk_score          |avg_goldstein_scale|total_num_mentions|total_num_sources|total_num_articles|avg_tone           |event_count|updated_at               |
+----------+--------------+---------------+--------------------+-------------------+------------------+-----------------+------------------+-------------------+-----------+-------------------------+
|2025-09-17|ALB           |Albania        |-1.916216216216216  |3.4                |5                 |1                |5                 |0.72072072072072   |1          |2025-09-21 19:35:05.63027|
|2025-09-17|ARM           |Armenia        |-3.3755464112391365 |3.4                |20                |6                |14                |6.77966101694916   |6          |2025-09-21 19:35:05.63027|
|2025

Marts 폴더의 gold_2nd_country_events.sql을 dbt run한 결과 확인

In [14]:
# gold_2nd_country_events 모델 결과 확인
print("🥇 [gold_2nd_country_events] 뷰")
spark.sql("SELECT * FROM gold.gold_2nd_country_events").show(truncate=False)

🥇 [gold_2nd_country_events] 뷰


25/09/22 00:32:37 WARN DeltaLog: Change in the table id detected while updating snapshot. 
Previous snapshot = Snapshot(path=s3a://warehouse/gold/gold_2nd_country_events/_delta_log, version=1, metadata=Metadata(e7f37df3-0bc9-4978-9b52-b03e6bb6966c,null,null,Format(parquet,Map()),{"type":"struct","fields":[{"name":"event_date","type":"date","nullable":true,"metadata":{}},{"name":"actor1_geo_iso","type":"string","nullable":true,"metadata":{}},{"name":"actor1_geo_name","type":"string","nullable":true,"metadata":{}},{"name":"actor1_geo_lat","type":"double","nullable":true,"metadata":{}},{"name":"actor1_geo_long","type":"double","nullable":true,"metadata":{}},{"name":"actor2_geo_iso","type":"string","nullable":true,"metadata":{}},{"name":"actor2_geo_name","type":"string","nullable":true,"metadata":{}},{"name":"actor2_geo_lat","type":"double","nullable":true,"metadata":{}},{"name":"actor2_geo_long","type":"double","nullable":true,"metadata":{}},{"name":"action_geo_iso","type":"string","nulla

+----------+--------------+--------------------------------------------------------+--------------+--------------+---------------+--------------+--------------------------------------------------------+------------------+--------------+---------------+--------------+--------------------------------------------------------+--------------+--------------+---------------+-------------------+--------------+-------------+--------------+-------------------+-----------+------------+------------+------------+------------+--------------------------+
|event_date|actor1_geo_iso|actor1_geo_eng                                          |actor1_geo_kor|actor1_geo_lat|actor1_geo_long|actor2_geo_iso|actor2_geo_eng                                          |actor2_geo_kor    |actor2_geo_lat|actor2_geo_long|action_geo_iso|action_geo_eng                                          |action_geo_kor|action_geo_lat|action_geo_long|avg_goldstein_scale|total_mentions|total_sources|total_articles|avg_tone           |

Marts 폴더의 gold_3rd_events_summary.sql을 dbt run한 결과 확인

In [None]:
# gold_3rd_events_summary 모델 결과 확인
print("🥇 [gold_3rd_events_summary] 뷰")
# spark.sql("SELECT * FROM gold.gold_3rd_events_summary").show(truncate=False)

Marts 폴더의 gold_4th_daily_detail_summary.sql을 dbt run한 결과 확인

In [9]:
# gold_4th_daily_detail_summary 모델 결과 확인
print("🥇 [gold_4th_daily_detail_summary] 뷰")
spark.sql("SELECT * FROM gold.gold_4th_daily_detail_summary").show(truncate=False)

🥇 [gold_4th_daily_detail_summary] 뷰


                                                                                

+----------+--------------------------------+--------------+------------------------+---------------+------------+----------+-----------+--------------------------+----------------------------------------------------+-----------------+------------+
|event_date|event_category                  |action_country|actor1_info             |actor2_info    |num_articles|is_anomaly|event_type |processed_time            |mp_event_info                                       |avg_tone         |num_mentions|
+----------+--------------------------------+--------------+------------------------+---------------+------------+----------+-----------+--------------------------+----------------------------------------------------+-----------------+------------+
|2024-09-11|ENGAGE IN DIPLOMATIC COOPERATION|Norway        |NORWAY                  |ISRAEL         |2           |0         |Cooperation|2025-09-11 01:48:18.167479|Grant diplomatic recognition                        |-6.06673407482306|2           |
|202

In [None]:
# spark.stop()
print("\n✅ Spark 세션이 종료되었습니다.")