# 01_data_ingestion.ipynb

## **Objective:**
Load customer purchase data from CSV into a MySQL database for further analysis.

---

## **1️⃣ Import Necessary Libraries**

In [None]:
import pandas as pd
from sqlalchemy import create_engine
import logging
import os

## **2️⃣ Set Up Logging**

In [None]:
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

## **3️⃣ Define File Paths and Database Configurations**

In [None]:
# Define file path
CSV_PATH = "../data/customer_data.csv"

# Database connection details
USERNAME = 'root'
PASSWORD = 'root'
HOST = 'localhost'
DATABASE = 'customer_analysis'

## **4️⃣ Connect to MySQL Database**
- Creates `customer_db` if it does not exist
- Establishes a connection

In [None]:
def connect_to_db():
    # Establish connection to MySQL database
    try:
        engine = create_engine(f'mysql+mysqlconnector://{USERNAME}:{PASSWORD}@{HOST}/{DATABASE}')
        logging.info("MySQL database connected successfully.")
        return engine
    except Exception as e:
        logging.error(f"Error connecting to MySQL database: {e}")
        return None

## **5️⃣ Load CSV Data into MySQL**
- Reads CSV data using Pandas
- Creates `customer_purchases` table if it does not exist
- Inserts data into the database

In [None]:
def load_csv_to_mysql(CSV_PATH):
    # Load CSV data
    if not os.path.exists(CSV_PATH):
        logging.error(f"File not found: {CSV_PATH}")
        return None
    
    try:
        df = pd.read_csv(CSV_PATH)
        logging.info(f"Data loaded successfully from {CSV_PATH}")
    except Exception as e:
        logging.error(f"Error loading data from {CSV_PATH}: {e}")
        return None
    
    # Connect to MySQL database
    table_name = 'customer_purchases'
    engine = connect_to_db()
    if not engine:
        logging.error("Database connection failed.")
        return None
    
    try:
        df.to_sql(table_name, engine, if_exists='replace', index=False)
        logging.info(f"Data successfully inserted into MySQL table: {table_name}")
    except Exception as e:
        logging.error(f"Error inserting data into MySQL table: {e}")
    finally:
        engine.dispose()
        logging.info("Database connection closed.")

## **6️⃣ Execute Data Ingestion**

In [None]:
load_csv_to_mysql(CSV_PATH)

## **Summary & Next Steps**
✅ CSV data has been successfully ingested into MySQL. 
✅ Next, move to `02_data_cleaning.ipynb` for data preprocessing.