In [1]:
import sqlite3
import pandas as pd

# 1. Connect to your SQLite database
db_path = "retail_sales.db"  
conn = sqlite3.connect(db_path)

# 2. Extract: Get all sales with dimension details
sales_query = """
SELECT 
    f.SaleID,
    c.CustomerName,
    p.ProductName,
    t.Date,
    s.StoreName,
    f.SalesAmount
FROM FactSales f
JOIN DimCustomer c ON f.CustomerID = c.CustomerID
JOIN DimProduct p ON f.ProductID = p.ProductID
JOIN DimTime t ON f.TimeID = t.TimeID
JOIN DimStore s ON f.StoreID = s.StoreID;
"""
sales_df = pd.read_sql_query(sales_query, conn)

#  Convert Date column to datetime
sales_df["Date"] = pd.to_datetime(sales_df["Date"], errors="coerce")

# 3. Transform: Save detailed sales to CSV
sales_csv_path = "transformed_sales.csv"
sales_df.to_csv(sales_csv_path, index=False)
print(f" Saved transformed sales data to {sales_csv_path}")

# 4. Transform: Create sales summary by Store & Year
summary_query = """
SELECT 
    s.StoreName,
    strftime('%Y', t.Date) AS Year,
    SUM(f.SalesAmount) AS TotalSales
FROM FactSales f
JOIN DimStore s ON f.StoreID = s.StoreID
JOIN DimTime t ON f.TimeID = t.TimeID
GROUP BY s.StoreName, Year
ORDER BY s.StoreName, Year;
"""
summary_df = pd.read_sql_query(summary_query, conn)

#  No need for datetime conversion here because Year is already a string

# 5. Save summary to CSV
summary_csv_path = "sales_summary_by_store_year.csv"
summary_df.to_csv(summary_csv_path, index=False)
print(f" Saved sales summary to {summary_csv_path}")

# Close connection
conn.close()




 Saved transformed sales data to transformed_sales.csv
 Saved sales summary to sales_summary_by_store_year.csv
