### DOCKER POSTGRESQL

In [None]:
import pandas as pd
from sqlalchemy import create_engine
from datetime import datetime
import urllib


# ========================================
# Función para convertir la URL de Google Sheet en CSV
# ========================================
def get_google_sheet_csv_url(sheet_url: str) -> str:
    file_id = sheet_url.split("/d/")[1].split("/")[0]
    return f"https://docs.google.com/spreadsheets/d/{file_id}/export?format=csv"

# ========================================
# Función para cargar y transformar los datos
# ========================================
def ingest_and_clean_google_sheet(sheet_url: str) -> pd.DataFrame:
    csv_url = get_google_sheet_csv_url(sheet_url)
    df = pd.read_csv(csv_url)
    df.columns = [col.strip().lower().replace(" ", "_") for col in df.columns]
    df["fecha_ingesta"] = datetime.today().strftime('%Y-%m-%d')
    return df

# ========================================
# Función para cargar en PostgreSQL (Docker)
# ========================================
def load_to_postgresql(df: pd.DataFrame, table_name: str, user: str, password: str, host: str, port: str, db: str):
    # Codifica la contraseña si tiene caracteres especiales
    password = urllib.parse.quote_plus(password)

    # Crea la URL de conexión para SQLAlchemy
    engine_url = f"postgresql://{user}:{password}@{host}:{port}/{db}"
    
    # Crear engine SQLAlchemy
    engine = create_engine(engine_url)

    # Carga en la tabla (append = agrega, replace = reemplaza)
    df.to_sql(table_name, engine, if_exists="append", index=False)
    print(f"✅ Datos cargados exitosamente en la tabla: {table_name}")

# ========================================
# PARÁMETROS
# ========================================

# URL de tu Google Sheet (asegurate de que esté compartida públicamente o como "Cualquiera con el enlace")
sheet_url = "https://docs.google.com/spreadsheets/d/1ukJBMyl7lVsP9XpCgnbU0k-Va9Z10HlX/edit?usp=drive_link&ouid=106134657838899716490&rtpof=true&sd=true"
# sheet_url = "https://docs.google.com/spreadsheets/d/1z-KDtW1Rj3bri3gCtE3-G-VKHawCc2HK/edit?usp=drive_link&ouid=106134657838899716490&rtpof=true&sd=true"


# Datos de tu PostgreSQL en Docker
POSTGRES_USER = "postgres"       # Usuario de la BD
POSTGRES_PASSWORD = "123456"  # Contraseña de la BD
POSTGRES_HOST = "localhost"     # Dirección del contenedor (localhost si estás trabajando en tu máquina local)
POSTGRES_PORT = "5431"         # Puerto que mapea el contenedor
POSTGRES_DB = "postgres"      # Nombre de la base de datos
TABLE_NAME = "sales_data"  # Nombre de la tabla a la que cargarás los datos

# ========================================
# FLUJO DE EJECUCIÓN
# ========================================
df_final = ingest_and_clean_google_sheet(sheet_url)
load_to_postgresql(
    df=df_final,
    table_name=TABLE_NAME,
    user=POSTGRES_USER,
    password=POSTGRES_PASSWORD,
    host=POSTGRES_HOST,
    port=POSTGRES_PORT,
    db=POSTGRES_DB
)


### SNOWFLAKE

In [6]:
import pandas as pd
from sqlalchemy import create_engine
from datetime import datetime


# ========================================
# Función para convertir la URL de Google Sheet en CSV
# ========================================
def get_google_sheet_csv_url(sheet_url: str) -> str:
    file_id = sheet_url.split("/d/")[1].split("/")[0]
    return f"https://docs.google.com/spreadsheets/d/{file_id}/export?format=csv"


# ========================================
# Función para cargar y transformar los datos
# ========================================
def ingest_and_clean_google_sheet(sheet_url: str) -> pd.DataFrame:
    csv_url = get_google_sheet_csv_url(sheet_url)
    df = pd.read_csv(csv_url)
    df.columns = [col.strip().lower().replace(" ", "_") for col in df.columns]
    df["fecha_ingesta"] = datetime.today().strftime('%Y-%m-%d')
    return df


# ========================================
# Función para cargar en Snowflake
# ========================================
def load_to_snowflake(df: pd.DataFrame, table_name: str, user: str, password: str,
                      account: str, warehouse: str, database: str, schema: str):
    # String de conexión usando SQLAlchemy + Snowflake
    engine_url = (
        f"snowflake://{user}:{password}@{account}/{database}/{schema}?warehouse={warehouse}"
    )

    # Crear engine SQLAlchemy para Snowflake
    engine = create_engine(engine_url)

    # Cargar los datos
    df.to_sql(table_name, engine, if_exists="append", index=False)
    print(f"✅ Datos cargados exitosamente en Snowflake - tabla: {table_name}")


# ========================================
# PARÁMETROS
# ========================================

sheet_url = "https://docs.google.com/spreadsheets/d/1ukJBMyl7lVsP9XpCgnbU0k-Va9Z10HlX/edit?usp=drive_link"

# Credenciales Snowflake
SNOWFLAKE_USER = "l3chu6a"
SNOWFLAKE_PASSWORD = "!Lcy8TyGHpV4gWJ"
SNOWFLAKE_ACCOUNT = "NURNGQI-BH02687"  # sin ".snowflakecomputing.com"
SNOWFLAKE_WAREHOUSE = "COMPUTE_WH"
SNOWFLAKE_DATABASE = "NUMETRI"
SNOWFLAKE_SCHEMA = "PUBLIC"
TABLE_NAME = "sales_data"

# ========================================
# FLUJO DE EJECUCIÓN
# ========================================
df_final = ingest_and_clean_google_sheet(sheet_url)
load_to_snowflake(
    df=df_final,
    table_name=TABLE_NAME,
    user=SNOWFLAKE_USER,
    password=SNOWFLAKE_PASSWORD,
    account=SNOWFLAKE_ACCOUNT,
    warehouse=SNOWFLAKE_WAREHOUSE,
    database=SNOWFLAKE_DATABASE,
    schema=SNOWFLAKE_SCHEMA
)


  warn_incompatible_dep(


✅ Datos cargados exitosamente en Snowflake - tabla: sales_data
