In [None]:
import os
import time
import pandas as pd
from sqlalchemy import create_engine
import logging

# -----------------------------
# Setup logging
# -----------------------------
os.makedirs("logs", exist_ok=True)
log_path = os.path.join("logs", "ingestion_log.log")

# Remove any existing handlers (important for Jupyter)
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

file_handler = logging.FileHandler(log_path, mode='a')
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
file_handler.setFormatter(formatter)

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logger.addHandler(file_handler)

logger.info("Application started")

# -----------------------------
# Setup database engine
# -----------------------------
engine = create_engine("mysql+pymysql://root:Mohammed313@localhost:3306/inventory_db")

# -----------------------------
# Function to insert a DataFrame or chunk into MySQL
# -----------------------------
def ingest_db(df, table_name, engine):
    try:
        df.to_sql(table_name, con=engine, if_exists='append', index=False, method="multi")
        logger.info(f"Chunk inserted into table '{table_name}' ({len(df)} rows)")
    except Exception as e:
        logger.error(f"Failed to insert chunk into table '{table_name}': {e}")

# -----------------------------
# Function to load CSVs from Data folder
# -----------------------------
def load_raw_data():
    start_time = time.time()
    data_folder = "Data"
    chunksize = 100_000  # adjust depending on memory

    for file in os.listdir(data_folder):
        if file.lower().endswith(".csv"):
            file_path = os.path.join(data_folder, file)
            table_name = os.path.splitext(file)[0]

            logger.info(f"Processing file: {file} -> table: {table_name}")

            try:
                for i, chunk in enumerate(pd.read_csv(file_path, chunksize=chunksize)):
                    ingest_db(chunk, table_name, engine)
                    logger.info(f"Chunk {i+1} inserted from {file}")

                    # flush logs immediately
                    for handler in logger.handlers:
                        handler.flush()

            except Exception as e:
                logger.error(f"Failed to process file {file}: {e}")

    end_time = time.time()
    total_minutes = (end_time - start_time) / 60
    logger.info("----------Ingest Completed-----------")
    logger.info(f"Total Time Taken: {total_minutes:.2f} minutes")

# -----------------------------
# Entry point
# -----------------------------
if __name__ == "__main__":
    load_raw_data()
