### The following automates the upload of all files in a local folder to Snowflake tables via staging.

In [None]:
pip install snowflake-connector-python pandas python-dotenv

In [None]:
import os
import re
import pandas as pd
import snowflake.connector
from snowflake.connector import connect
from dotenv import load_dotenv
import logging
import urllib.parse

In [None]:
#Path to CSV folder
csv_folder = os.getenv("FOLDER_PATH")

In [None]:
load_dotenv(override=True)

#Establish Snowflake connection
conn = connect(
    user=os.getenv("SNOWFLAKE_USER"),
    password=os.getenv("SNOWFLAKE_PASSWORD"),
    account=os.getenv("SNOWFLAKE_ACCOUNT"),
    warehouse=os.getenv("WAREHOUSE_NAME"),
    database=os.getenv("DATABASE_NAME"),
    schema=os.getenv("SCHEMA_NAME")
)
cur = conn.cursor()


#Loop through all CSVs
for filename in os.listdir(csv_folder):
    if not filename.endswith(".csv"):
        continue

    filepath = os.path.join(csv_folder, filename)
    table_name = os.path.splitext(filename)[0].upper().replace(" ", "_").replace("-", "_")

    print(f"Processing: {filename}")

    #Preview file with pandas to determine column types
    try:
        df_sample = pd.read_csv(filepath, nrows=100, encoding='utf-8', on_bad_lines='skip')
        print(f"{filename} loaded with UTF-8 encoding.")
    except UnicodeDecodeError:
        df_sample = pd.read_csv(filepath, nrows=100, encoding='windows-1252', on_bad_lines='skip')
        print(f"{filename} loaded with Windows-1252 encoding.")
        
    #Build CREATE TABLE query, make sure column datatypes are parsed correctly
    col_defs = []
    for col, dtype in zip(df_sample.columns, df_sample.dtypes):
        col_sql = col.upper().replace(" ", "_").replace("-", "_")
        if pd.api.types.is_integer_dtype(dtype):
            sql_type = "NUMBER"
        elif pd.api.types.is_float_dtype(dtype):
            sql_type = "FLOAT"
        elif pd.api.types.is_bool_dtype(dtype):
            sql_type = "BOOLEAN"
        elif pd.api.types.is_datetime64_any_dtype(dtype):
            sql_type = "TIMESTAMP_NTZ"
        else:
            sql_type = "VARCHAR"
        col_defs.append(f'"{col_sql}" {sql_type}')
    
    create_stmt = f'CREATE OR REPLACE TABLE "MARKETING_CLOUD_EXTRACTS"."{table_name}" ({", ".join(col_defs)});' #each table name points to new Snowflake table in schema
    cur.execute(create_stmt)
    print(f"✅ Created table: {table_name}") #check, icon is unnecessary but helps readability

    #Upload to internal stage
    escaped_path = filepath.replace("\\", "/")  #deal with annoying slash problem
    put_stmt = f"PUT 'file://{escaped_path}' @MC_EXTRACT_STAGE AUTO_COMPRESS=TRUE OVERWRITE=TRUE"
    cur.execute(put_stmt) #execute the query
    print(f"Uploaded to stage: {filename}") #check that file was uploaded to stage

    #COPY INTO table
    copy_stmt = f"""
        COPY INTO MARKETING_CLOUD_EXTRACTS.{table_name}
        FROM @MC_EXTRACT_STAGE/{filename}.gz
        FILE_FORMAT = (TYPE = CSV SKIP_HEADER = 1 FIELD_OPTIONALLY_ENCLOSED_BY = '"')
        ON_ERROR = 'CONTINUE';
    """
    cur.execute(copy_stmt)
    print(f"✅ Loaded into table: {table_name}\n") 

cur.close()
conn.close()
print("All files uploaded and loaded successfully.")