# Load Phase - MinIO & PostgreSQL

In [None]:
execution_date = '2025-10-31'
input_dir = '/opt/airflow/data/processed'

In [None]:
import sys
sys.path.append('/opt/airflow')
from src.load import load_to_minio, load_to_postgres
from src.transform import get_spark_session
from src.utils import setup_logging
from pyspark.sql import functions as F

logger = setup_logging('load', '/opt/airflow/logs')
logger.info(f'Loading data for {execution_date}')
print(f'📤 Loading to MinIO and PostgreSQL')

In [None]:
spark = get_spark_session()
player_stats = spark.read.option('header', True).option('inferSchema', True).csv(f'{input_dir}/player_stats_{execution_date}.csv')
hero_performance = spark.read.option('header', True).option('inferSchema', True).csv(f'{input_dir}/hero_performance_{execution_date}.csv')
print(f'✓ Loaded processed data: {player_stats.count()} player records')

In [None]:
print('\nLoading to MinIO (data lake)...')
try:
    s3_player, s3_hero = load_to_minio(player_stats, hero_performance, execution_date)
    print(f'✓ Loaded to MinIO:\n  - {s3_player}\n  - {s3_hero}')
except Exception as e:
    print(f'⚠ MinIO load skipped (optional): {e}')
    logger.warning(f'MinIO load failed: {e}')

In [None]:
print('\nLoading to PostgreSQL (queryable database)...')
load_to_postgres(player_stats, hero_performance, execution_date)
print(f'✓ Loaded to PostgreSQL')

In [None]:
logger.info('Load phase complete')
print('\n✓ Load phase complete! Data in MinIO (archive) and PostgreSQL (queryable)')