# Automated Data Ingestion Pipeline
**Objective:** Automate the extraction of raw CSV data and load it into the MySQL database using Python.
**Stack:** Python, Pandas, SQLAlchemy.

This script replaces the manual "Import Wizard" process, ensuring reproducibility.

In [None]:
# !pip install pandas sqlalchemy mysql-connector-python

In [None]:
import pandas as pd
from sqlalchemy import create_engine
import os
from sqlalchemy.types import Text


# 1. DATABASE CONFIGURATION

DB_USER = 'root'
DB_PASSWORD = 'password'  
DB_HOST = 'localhost'
DB_PORT = '3306'
DB_NAME = 'toy_store_ecommerce'

# Create SQLAlchemy Engine
connection_string = f"mysql+mysqlconnector://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
engine = create_engine(connection_string)

print("Database engine created successfully.")

âœ… Database engine created successfully.


In [None]:
# 2. FILE MAPPING (CSV -> SQL Table)

# Dictionary mapping raw CSV filenames to desired SQL table names
files_to_load = {
    'orders.csv': 'raw_orders',
    'order_items.csv': 'raw_order_items',
    'order_item_refunds.csv': 'raw_order_item_refunds',
    'products.csv': 'raw_products', 
    'website_pageviews.csv': 'raw_website_pageviews',       
    'website_sessions.csv': 'raw_website_sessions'
}

# Path to raw data folder
data_folder = 'data/'

In [8]:
# 3. ETL EXECUTION LOOP

for file_name, table_name in files_to_load.items():
    file_path = os.path.join(data_folder, file_name)
    
    try:
        print(f"Processing: {file_name} -> Table: {table_name}...")
        
        # A. EXTRACT: Read CSV into Pandas DataFrame
        df = pd.read_csv(file_path)
        df = df.astype(str)
        dtype_dict = {col: Text() for col in df.columns}
        
        # B. LOAD: Write DataFrame to SQL

        df.to_sql(name=table_name, 
                  con=engine, 
                  if_exists='replace', 
                  index=False, 
                  chunksize=1000,
                  dtype= dtype_dict)
        
        print(f"Success! Loaded {len(df)} rows into '{table_name}'.")
        
    except Exception as e:
        print(f"Error loading {file_name}: {e}")

print("\n All files processed. Data ingestion complete.")

Processing: orders.csv -> Table: raw_orders...
Success! Loaded 32313 rows into 'raw_orders'.
Processing: order_items.csv -> Table: raw_order_items...
Success! Loaded 40025 rows into 'raw_order_items'.
Processing: order_item_refunds.csv -> Table: raw_order_item_refunds...
Success! Loaded 1731 rows into 'raw_order_item_refunds'.
Processing: products.csv -> Table: raw_products...
Success! Loaded 4 rows into 'raw_products'.
Processing: website_pageviews.csv -> Table: raw_website_pageviews...
Success! Loaded 1188124 rows into 'raw_website_pageviews'.
Processing: website_sessions.csv -> Table: raw_website_sessions...
Success! Loaded 472871 rows into 'raw_website_sessions'.

 All files processed. Data ingestion complete.
