In [None]:
import pandas as pd
from datetime import datetime
import numpy as np
from sqlalchemy import create_engine

# CONFIGURATION

In [None]:
CSV_FILE = "../data/sales_2.csv"

In [None]:
MYSQL_USER = "<user_name>"
MYSQL_PASSWORD = "<password>"
MYSQL_HOST = "localhost"
MYSQL_PORT = "3306"
MYSQL_DATABASE = "sales_db"

TARGET_TABLE = "etl_transformed_sales"

## Database Connection

In [None]:
# SQLAlchemy connection string
conn_str = (
    f"mysql+pymysql://{MYSQL_USER}:{MYSQL_PASSWORD}@{MYSQL_HOST}:{MYSQL_PORT}/{MYSQL_DATABASE}"
)

In [None]:
# Create SQLAlchemy engine
engine = create_engine(conn_str)

### STEP 1: READ CSV FILE (Extract)

In [None]:
try:
    df = pd.read_csv(CSV_FILE)
    print(f"Data extracted from {CSV_FILE}, shape = {df.shape}")
    display(df.head())
except Exception as e:
    print(f"Error reading CSV: {e}")
    raise

## STEP 2: TRANSFORMATIONS (Transform)

In [None]:
# Clean column names
df.columns = (
    df.columns
      .str.replace(' ', '_')
      .str.replace(r'([A-Z])', r'_\1', regex=True)
      .str.lower()
      .str.strip('_')
)

In [None]:
# Convert order_date â†’ datetime64
df["order_date"] = pd.to_datetime(df["order_date"])

In [None]:
# numeric conversion
df["sales_amount"] = pd.to_numeric(df["sales_amount"])

In [None]:
# Derived field
df["unit_price"] = df["sales_amount"]

In [None]:
# Categorical tier
conditions = [
    df["sales_amount"] >= 500,
    df["sales_amount"] >= 100
]
choices = ["High Value", "Medium Value"]

df["sales_tier"] = np.select(conditions, choices, default="Low Value")

In [None]:
# Filter out non-positive values
df = df[df["sales_amount"] > 0]

In [None]:
# Add load timestamp
df["load_timestamp"] = pd.to_datetime(datetime.utcnow())

In [None]:
print("Transformations complete.")
display(df.head())

## STEP 3: LOAD INTO MYSQL (Load)

In [None]:
try:
    df.to_sql(
        name=TARGET_TABLE,
        con=engine,
        if_exists="append",
        index=False,
        chunksize=1000
    )

    print(f"Data successfully loaded into MySQL table: {TARGET_TABLE}")

except Exception as e:
    print(f"Error loading to MySQL: {e}")