# Seed 생성 확인용 테스트 노트북

dbt seed로 생성된 seed 테이블 확인용

In [1]:
import sys
sys.path.append('/app')

from src.utils.spark_builder import get_spark_session
from pyspark.sql import functions as F

# Spark 세션 생성
spark = get_spark_session("GDELT_Seed_Test", "spark://spark-master:7077")
print("✅ Spark 세션 생성 완료")

25/09/17 02:19:19 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


✅ Spark 세션 생성 완료


In [2]:
# 1. Spark Metastore에 존재하는 모든 데이터베이스(스키마) 목록을 확인합니다.
print("📖 사용 가능한 데이터베이스(스키마) 목록:")
spark.sql("SHOW DATABASES").show()

📖 사용 가능한 데이터베이스(스키마) 목록:
+---------+
|namespace|
+---------+
|  default|
|     gold|
|   silver|
+---------+



In [3]:
# 2. 'default' 스키마 안에 있는 테이블 목록을 확인합니다.
# dbt seed로 생성된 테이블이 여기에 보여야 합니다. (gdelt_silver_events는 seed 테이블이 아닙니다.)
print("📜 'default' 스키마의 테이블 목록:")
spark.sql("SHOW TABLES IN default").show()

📜 'default' 스키마의 테이블 목록:


[Stage 0:>                                                          (0 + 1) / 1]

+---------+--------------------+-----------+
|namespace|           tableName|isTemporary|
+---------+--------------------+-----------+
|  default|actor_ethnic_grou...|      false|
|  default|actor_organizatio...|      false|
|  default|actor_religion_codes|      false|
|  default|    actor_role_codes|      false|
|  default|  event_detail_codes|      false|
|  default|event_quad_class_...|      false|
|  default|    event_root_codes|      false|
|  default| gdelt_silver_events|      false|
|  default|       geo_adm_codes|      false|
|  default|geo_country_fips_...|      false|
|  default|geo_country_iso_c...|      false|
|  default|      geo_type_codes|      false|
+---------+--------------------+-----------+



                                                                                

In [None]:
# 3. 총 11개의 seed 테이블의 샘플 데이터를 5개씩 출력하여 내용 확인
seed_tables = [
    "actor_ethnic_group_codes",
    "actor_organization_codes",
    "actor_religion_codes",
    "actor_role_codes",
    "event_root_codes",
    "event_detail_codes",
    "event_quad_class_codes",
    "geo_adm_codes",
    "geo_country_fips_codes",
    "geo_country_iso_codes",
    "geo_type_codes"
]

for table in seed_tables:
    print(f"\n 🔍 {table} 테이블 샘플 데이터")
    try:
        spark.sql(f"SELECT * FROM default.{table} LIMIT 5").show(truncate=False)
    except Exception as e:
        print(f"❌ 테이블 조회 중, 오류 발생: {e}")

생성된 seed 테이블을 삭제할 경우, 아래 코드에 "삭제할 seed 테이블명"을 입력하여 실행

In [None]:
# 삭제할 seed 테이블 이름 목록
seed_tables_to_drop = [
    # "[삭제할 seed 테이블명]"
]
print("🗑️ 기존 Seed 테이블 삭제를 시작합니다.")

for table_name in seed_tables_to_drop:
    try:
        spark.sql(f"DROP TABLE IF EXISTS default.{table_name}")
        print(f"  - 테이블 'default.{table_name}' 삭제 완료.")
    except Exception as e:
        print(f"  - 테이블 'default.{table_name}' 삭제 중 오류 발생: {e}")

print("\n✅ 모든 Seed 테이블 삭제 작업이 완료되었습니다.")

# 삭제 후 테이블 목록을 다시 확인하여 깨끗해졌는지 확인
print("📜 현재 'default' 스키마의 테이블 목록:")
spark.sql("SHOW TABLES IN default").show()

Staging 폴더의 stg_seed_mapping.sql을 dbt run한 결과 확인

In [4]:
# stg_seed_mapping 모델 결과 확인
print("🥈 [stg_seed_mapping] 뷰 : 상위 20개 데이터")
spark.sql("SELECT * FROM silver.stg_seed_mapping LIMIT 20").show(truncate=False)
# spark.sql("SELECT * FROM silver.stg_seed_mapping WHERE source_url = 'https://pakobserver.net/palestine-recognition-costs-a-high-price-of-genocide-famine/'").show(truncate=False)

🥈 [stg_seed_mapping] 뷰 : 상위 20개 데이터


25/09/17 02:19:38 WARN MetricsConfig: Cannot locate configuration: tried hadoop-metrics2-s3a-file-system.properties,hadoop-metrics2.properties
25/09/17 02:19:44 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.
[Stage 22:>                                                         (0 + 1) / 1]

+---------------+----------+-------------+--------------------------------+---------------------------------------------+----------+------------------+---------------+------------+-----------+------------+-----------------+-----------+--------------+-------------------+-----------------------------+----------------------+---------------------------+----------------+------------------+-----------------------+--------------------------------+-------------------+------------------+--------------------------+--------------------+--------------+---------------+---------------------+-----------+--------------+------------------------------+----------------------+---------------------------+----------------+------------------+-----------------------+--------------------------------------------------------------------------------+-------------------+------------------+--------------------------+--------------------+--------------+---------------+---------------------+------------------+-------

                                                                                

Staging 폴더의 stg_seed_actors_parsed.sql을 dbt run한 결과 확인

In [5]:
# stg_seed_actors_parsed 모델 결과 확인
print("🥈 [stg_seed_actors_parsed] 뷰 : 상위 20개 데이터")
spark.sql("SELECT * FROM silver.stg_seed_actors_parsed LIMIT 20").show(truncate=False)

🥈 [stg_seed_actors_parsed] 뷰 : 상위 20개 데이터


                                                                                

+------------+--------------------------------------------------------------------------------------------------------------------------------------+--------+--------------------------------------------------------------------------------+--------+----------------------------------------------+------------+------------------------------------------------------------------+-----+
|actor_code  |description1                                                                                                                          |type1   |description2                                                                    |type2   |description3                                  |type3       |description4                                                      |type4|
+------------+--------------------------------------------------------------------------------------------------------------------------------------+--------+------------------------------------------------------------------------------

Staging 폴더의 stg_actors_description.sql을 dbt run한 결과 확인

In [6]:
# stg_actors_description 모델 결과 확인
print("🥈 [stg_actors_description] 뷰 : 상위 20개 데이터")
spark.sql("SELECT * FROM silver.stg_actors_description LIMIT 20").show(truncate=False)

🥈 [stg_actors_description] 뷰 : 상위 20개 데이터
+---------------+-----------------------------------------------+--------------------------------------------------------------------------------+
|global_event_id|actor1_info                                    |actor2_info                                                                     |
+---------------+-----------------------------------------------+--------------------------------------------------------------------------------+
|1262473749     |France                                         |Inter-governmental organizations United Nations                                 |
|1262473750     |France                                         |Saudi Arabia                                                                    |
|1262473751     |France                                         |Saudi Arabia                                                                    |
|1262473743     |Judiciary: judges, courts                      |null       

Marts 폴더의 gold_1st_global_overview.sql을 dbt run한 결과 확인

In [7]:
# gold_1st_global_overview 모델 결과 확인
print("🥇 [gold_1st_global_overview] 뷰")
spark.sql("SELECT * FROM gold.gold_1st_global_overview").show(truncate=False)

🥇 [gold_1st_global_overview] 뷰


                                                                                

+----------+----------------+-------------------+--------------------+------------------+-----------------+------------------+-------------------+-----------+--------------------------+
|event_date|country_name    |risk_score         |avg_goldstein_scale |total_num_mentions|total_num_sources|total_num_articles|avg_tone           |event_count|updated_at                |
+----------+----------------+-------------------+--------------------+------------------+-----------------+------------------+-------------------+-----------+--------------------------+
|2025-09-11|Afghanistan     |0.385573800907541  |0.7136363636363634  |65                |22               |65                |-0.4139449735135315|22         |2025-09-17 02:16:05.698069|
|2025-09-11|Australia       |0.814058132862184  |0.7170212765957444  |241               |47               |231               |-1.3417975027268148|47         |2025-09-17 02:16:05.698069|
|2025-09-11|Austria         |-1.826086956521739 |3.0                 |

Marts 폴더의 gold_2nd_country_events.sql을 dbt run한 결과 확인

In [8]:
# gold_2nd_country_events 모델 결과 확인
print("🥇 [gold_2nd_country_events] 뷰")
spark.sql("SELECT * FROM gold.gold_2nd_country_events").show(truncate=False)

🥇 [gold_2nd_country_events] 뷰


                                                                                

+----------+--------------+------------------------------+-------------------+--------------+-------------+--------------+------------------+-----------+-----------------+-------------------+---------------------+-----------------------+--------------------------+
|event_date|actor1_country|actor2_country                |avg_goldstein_scale|total_mentions|total_sources|total_articles|avg_tone          |event_count|verbal_coop_count|material_coop_count|verbal_conflict_count|material_conflict_count|updated_at                |
+----------+--------------+------------------------------+-------------------+--------------+-------------+--------------+------------------+-----------+-----------------+-------------------+---------------------+-----------------------+--------------------------+
|2025-09-11|Poland        |Russia                        |-5.288888888888889 |58            |18           |58            |-4.340645603320586|18         |5                |0                  |4             

Marts 폴더의 gold_3rd_events_summary.sql을 dbt run한 결과 확인

In [None]:
# gold_3rd_events_summary 모델 결과 확인
print("🥇 [gold_3rd_events_summary] 뷰")
# spark.sql("SELECT * FROM gold.gold_3rd_events_summary").show(truncate=False)

Marts 폴더의 gold_4th_daily_detail_summary.sql을 dbt run한 결과 확인

In [9]:
# gold_4th_daily_detail_summary 모델 결과 확인
print("🥇 [gold_4th_daily_detail_summary] 뷰")
spark.sql("SELECT * FROM gold.gold_4th_daily_detail_summary").show(truncate=False)

🥇 [gold_4th_daily_detail_summary] 뷰


                                                                                

+----------+--------------------------------+--------------+------------------------+---------------+------------+----------+-----------+--------------------------+----------------------------------------------------+-----------------+------------+
|event_date|event_category                  |action_country|actor1_info             |actor2_info    |num_articles|is_anomaly|event_type |processed_time            |mp_event_info                                       |avg_tone         |num_mentions|
+----------+--------------------------------+--------------+------------------------+---------------+------------+----------+-----------+--------------------------+----------------------------------------------------+-----------------+------------+
|2024-09-11|ENGAGE IN DIPLOMATIC COOPERATION|Norway        |NORWAY                  |ISRAEL         |2           |0         |Cooperation|2025-09-11 01:48:18.167479|Grant diplomatic recognition                        |-6.06673407482306|2           |
|202

In [None]:
# spark.stop()
print("\n✅ Spark 세션이 종료되었습니다.")