In [1]:
import pandas as pd
import time
import sys
import os
from sqlalchemy import create_engine
from sqlalchemy.orm import declarative_base

import logging

# Ensure logs directory exists
os.makedirs("logs", exist_ok=True)

logging.basicConfig(
    filename="logs/ingestion_db.log",
    level=logging.DEBUG,
    format="%(asctime)s - %(levelname)s - %(message)s",
    filemode="a"
)
engine = create_engine("sqlite:///Fraud_detection.db")
Base = declarative_base()
Base.metadata.create_all(engine)

def ingest_db(df, tablename, engine):
    df.to_sql(tablename, con=engine, if_exists="replace", index=False)

def load_raw_data():
    start = time.time()
    for file in os.listdir('data'):
        if file.endswith(".csv"):
            try:
                df = pd.read_csv(os.path.join("data", file))
                logging.info(f"Ingesting {file} into db")
                tablename = os.path.splitext(file)[0]
                ingest_db(df, tablename, engine)
            except Exception as e:
                logging.error(f"Failed to ingest {file}: {e}")
    end = time.time()
    totaltime = (end - start) / 60
    logging.info("------------ingestion complete-------------")
    logging.info(f"Total time taken: {totaltime:.2f} minutes")

if __name__ == "__main__":
    load_raw_data()
    logging.shutdown()  # Ensure all logs are written
