In [11]:
import pandas as pd
from sqlalchemy import create_engine, text
from dotenv import load_dotenv
import os

load_dotenv('../.env')
engine = create_engine(os.getenv('DB_URL'))
print('CONNECTED to database')

CONNECTED to database


In [12]:
# Load UCI Onmline Retail
df = pd.read_excel(
    '/Users/surajkumar/Documents/GitHub/personal-portfolio-projects/revenue_intelligence_platform/data/raw/Online_Retail.xlsx', 
    engine = 'openpyxl'
    )
print(f"ROws: {len(df):,}")
print(f"Columns:{list(df.columns)}")
print(f"Date range: {df.InvoiceDate.min()} -> {df.InvoiceDate.max()}")

ROws: 541,909
Columns:['InvoiceNo', 'StockCode', 'Description', 'Quantity', 'InvoiceDate', 'UnitPrice', 'CustomerID', 'Country']
Date range: 2010-12-01 08:26:00 -> 2011-12-09 12:50:00


In [13]:
# Add pipeline metadata
df['_load_timestamp'] = pd.Timestamp.now('UTC')
df['_source_file'] = 'Online_Retail.xlsx'

df.columns = (
    df.columns
      .str.strip()
      .str.lower()
      .str.replace(" ", "_")
)

# Load to bronze
df.to_sql(
    'online_retail', engine, schema='bronze',
    if_exists='replace', index=False, chunksize=5000
)
with engine.connect() as conn:
    n = conn.execute(text('select count(*) from bronze.online_retail')).scalar()
    print(f"bronze.online_retail: {n:} rows")

bronze.online_retail: 541909 rows


In [14]:
# Load Telco Churn
df2 = pd.read_csv(
    "/Users/surajkumar/Documents/GitHub/personal-portfolio-projects/revenue_intelligence_platform/data/raw/Telco_Churn.csv"
    )
print(f"Rows: {len(df2):,}")
print(f"Columns: {list(df.columns)}")

df2['_load_timestamp'] = pd.Timestamp.now('UTC')
df2['_source_file'] = 'Telco_Churn.csv'

df2.columns = (
    df2.columns
      .str.strip()
      .str.lower()
      .str.replace(" ", "_")
)

df2.to_sql(
    'telco_churn', engine, schema='bronze',
    if_exists='replace', index=False
)

with engine.connect() as conn:
    n = conn.execute(text("select count(*) from bronze.telco_churn")).scalar()
    print(f"bronze.telco_churn: {n:,} rows")

Rows: 7,043
Columns: ['invoiceno', 'stockcode', 'description', 'quantity', 'invoicedate', 'unitprice', 'customerid', 'country', '_load_timestamp', '_source_file']
bronze.telco_churn: 7,043 rows
