In [1]:
import mariadb
import os
import pandas as pd
import yaml
from columnar import columnar
from csv import DictReader
from datetime import date, timedelta
from dotenv import load_dotenv
from pandas.tseries.holiday import USFederalHolidayCalendar

_ = load_dotenv("config.env")
with open("config.yaml", "r") as stream:
    config = yaml.safe_load(stream)
    
DATABASE_NAME: str = "data_mart"

In [2]:
connection = mariadb.connect(
    host="127.0.0.1",
    port=23306,
    user=os.getenv("user"),
    password=os.getenv("password"),
    autocommit=False,
)
cursor = connection.cursor()

## Deliverable 1

In [3]:
sales_report_begin_date: str = "2020-12-01"
sales_report_end_date: str = "2020-12-31"

def total_sales_per_sku(db_name: str, table_name: str, date_field: str):
    cursor.execute("DROP TABLE IF EXISTS {db}.total_sales_per_sku".format(db=db_name))
    cursor.execute(
        """
        CREATE TABLE {db_name}.total_sales_per_sku (sku INT, count INT)
        SELECT sku, COUNT(*) AS "count"
                FROM {db_name}.{table_name}
                WHERE {date_field} BETWEEN '{begin_date}' AND '{end_date}'
                GROUP BY sku
                ORDER BY COUNT(*) DESC
        """.format(
            db_name=db_name,
            table_name=table_name,
            date_field=date_field,
            begin_date=sales_report_begin_date,
            end_date=sales_report_end_date,
        )
    )

In [4]:
regen_total_sales_per_sku = False

if regen_total_sales_per_sku:
    total_sales_per_sku("team8", "transactions", "transaction_date")
    total_sales_per_sku("team7", "purchases", "Date")

In [5]:
if regen_total_sales_per_sku:
    cursor.execute("DROP TABLE IF EXISTS {db}.total_sales_per_sku".format(db=DATABASE_NAME))
    cursor.execute(
        """
        CREATE TABLE IF NOT EXISTS {db}.total_sales_per_sku (sku INT, count INT, rank INT, team7_count INT, team7_rank INT, team8_count INT, team8_rank INT)
        SELECT sku, total as count, rank, team7_count, team7_rank, team8_count, team8_rank FROM
            (SELECT * FROM
                (SELECT sku, CAST(SUM(count) AS INTEGER) AS total, ROW_NUMBER() OVER (ORDER BY total DESC) AS rank
                    FROM (SELECT sku, count FROM team8.total_sales_per_sku
                          UNION ALL
                          SELECT sku, count FROM team7.total_sales_per_sku) total
                    GROUP BY total.sku) global
                JOIN (SELECT sku as team8_sku, count AS team8_count, ROW_NUMBER() OVER (ORDER BY count DESC) AS team8_rank FROM team8.total_sales_per_sku) team8
                ON sku = team8_sku) gl_team8
            JOIN (SELECT sku AS team7_sku, count AS team7_count, ROW_NUMBER() OVER (ORDER BY count DESC) AS team7_rank FROM team7.total_sales_per_sku) team7
            ON sku = team7_sku
            ORDER BY rank
        """.format(
            db=DATABASE_NAME
        )
    )

In [6]:
cursor.execute(
    """
    SELECT product_catalog.sku, CONCAT(manufacturer, ": ", product_name, " (", size, ")") AS product, count, rank, team7_count, team7_rank, team8_count, team8_rank
    FROM {db}.total_sales_per_sku
    JOIN {db}.product_catalog ON product_catalog.sku = total_sales_per_sku.sku
    ORDER BY rank
    LIMIT 25
    """.format(
        db=DATABASE_NAME
    )
)
tuples = cursor.fetchall()
lists = [list(x) for x in tuples]
best_selling_table = columnar(
    lists,
    ["sku", "product", "total # sold", "total rank", "team7 # sold", "team7 rank", "team8 # sold", "team8 rank"],
    no_borders=True,
    terminal_width=150,
)

In [7]:
cursor.execute(
    """
    SELECT SUM(sale_price) FROM team8.transactions
    WHERE transaction_date BETWEEN '{begin_date}' AND '{end_date}'
    """.format(
        begin_date=sales_report_begin_date,
        end_date=sales_report_end_date,
    )
)
team8_total_sales = float(cursor.fetchall()[0][0])

In [8]:
cursor.execute(
    """
    SELECT COUNT(DISTINCT(transaction_id)) FROM team8.transactions
    WHERE transaction_date BETWEEN '{begin_date}' AND '{end_date}'
    """.format(
        begin_date=sales_report_begin_date,
        end_date=sales_report_end_date,
    )
)
team8_customer_count = cursor.fetchall()[0][0]

In [9]:
cursor.execute(
    """
    SELECT * FROM team7.purchases
    WHERE Date BETWEEN '{begin_date}' AND '{end_date}'
    GROUP BY Date, `Customer Number`
    """.format(
        begin_date=sales_report_begin_date,
        end_date=sales_report_end_date,
    )
)
team7_customer_count = len(cursor.fetchall())

In [10]:
cursor.execute(
    """
    SELECT SUM(`Sale Price`) FROM team7.purchases
    WHERE Date BETWEEN '{begin_date}' AND '{end_date}'
    """.format(
        begin_date=sales_report_begin_date,
        end_date=sales_report_end_date,
    )
)
team7_total_sales = float(cursor.fetchall()[0][0])

In [11]:
total_sales = team7_total_sales + team8_total_sales
total_customers = team7_customer_count + team8_customer_count

header_table = columnar(
    [
        ["Total Sales", "${:,.2f}".format(total_sales), "${:,.2f}".format(team7_total_sales), "${:,.2f}".format(team8_total_sales)],
        ["Customer Count", "{:,}".format(total_customers), "{:,}".format(team7_customer_count), "{:,}".format(team8_customer_count)],
    ],
    ["measure", "combined", "team7", "team8"],
    no_borders=True,
    terminal_width=150,
)

print(header_table)

print(best_selling_table)

          
  MEASURE         COMBINED        TEAM7          TEAM8          
    
  Total Sales     $10,731,154.19  $5,297,829.40  $5,433,324.79  
  Customer Count  70,452          35,156         35,296         

                  
  SKU       PRODUCT                                                     TOTAL # SOLD  TOTAL RANK  TEAM7 # SOLD  TEAM7 RANK  TEAM8 # SOLD  TEAM8 RANK  
    
  42356001  Rowan Dairy: 1.00% Milk (1/2 gal)                           8317          1           4142          2           4175          2           
  42357001  Rowan Dairy: 2.00% Milk (1 gal)                             8290          2           4185          1           4105          4           
  42358001  Rowan Dairy: 2.00% Milk (1/2 gal)                           8283          3           4081          4           4202          1           
  42355001  Rowan Dairy: 1.00% Milk (1 gal)                             8197          4           4130          3           4067          6           
  4236000

## Deliverable 2a

### Insert product class file into MariaDB

In [12]:
cursor.execute(
    """
    CREATE TABLE IF NOT EXISTS {db}.product_class (
        product_class_id INT UNSIGNED PRIMARY KEY NOT NULL,
        product_subcategory TEXT NOT NULL,
        product_category TEXT NOT NULL,
        product_department TEXT NOT NULL,
        product_family TEXT NOT NULL
    )
    """.format(
        db=DATABASE_NAME
    )
)

with open("product_class.txt") as csv_file:
    csv_reader = DictReader(csv_file, delimiter="\t")
    for row in csv_reader:
        result = cursor.execute(
            """
            INSERT IGNORE INTO {db}.product_class (product_class_id, product_subcategory, product_category, product_department, product_family) VALUES (?, ?, ?, ?, ?)
            """.format(
                db=DATABASE_NAME
            ),
            (
                row["product_class_id"],
                row["product_subcategory"].upper(),
                row["product_category"].upper(),
                row["product_department"].upper(),
                row["product_family"].upper(),
            ),
        )
connection.commit()

### Create Product Dimension table in MariaDB

In [13]:
cursor.execute("DROP TABLE IF EXISTS {db}.product_dimension".format(db=DATABASE_NAME))
cursor.execute(
    """
    CREATE TABLE IF NOT EXISTS {db}.product_dimension (
        product_key INT PRIMARY KEY AUTO_INCREMENT,
        sku INT UNSIGNED NOT NULL,
        product_name TEXT NOT NULL,
        product_class_id INT UNSIGNED,
        product_subcategory TEXT,
        product_category TEXT,
        product_department TEXT,
        product_family TEXT,
        size TEXT NOT NULL,
        number_per_case INT UNSIGNED NOT NULL,
        brand_name TEXT,
        manufacturer TEXT NOT NULL,
        supplier TEXT NOT NULL,
        product_class_source_key INT UNSIGNED NOT NULL
    )
    """.format(
        db=DATABASE_NAME
    )
)

### Create Product Class Source table in MariaDB

In [14]:
cursor.execute("DROP TABLE IF EXISTS {db}.product_class_source".format(db=DATABASE_NAME))
cursor.execute(
    """
    CREATE TABLE {db}.product_class_source (
        source_key INT PRIMARY KEY NOT NULL,
        source TEXT NOT NULL
    )
    """.format(
        db=DATABASE_NAME
    )
)


query = "INSERT INTO {db}.product_class_source (source_key, source) VALUES (?, ?)".format(db=DATABASE_NAME)

records_to_insert = [
    (0, "NO PRODUCT CLASS"),
    (1, "FROM PRODUCT TABLE"),
    (2, "SPECIFIC ITEM TYPE MAPPED TO SPECIFIC PRODUCT SUBCATEGORY"),
    (3, "SPECIFIC MANUFACTURER MAPPED TO SPECIFIC PRODUCT SUBCATEGORY"),
    (4, "BASED ON ITEM TYPE AND STRING SEARCH OF PRODUCT NAME"),
    (5, "BASED ON MANUFACTURER AND STRING SEARCH OF PRODUCT NAME"),
    (6, "BASED ON STRING SEARCH OF PRODUCT NAME"),
]

cursor.executemany(query, records_to_insert)
connection.commit()

### Populate Product Dimension table in MariaDB

In [15]:
cursor.execute("SELECT * FROM {db}.product_catalog".format(db=DATABASE_NAME))

type_replacements = config.get("typeReplacements")
manufacturer_to_class = config.get("manufacturerToClass")
product_name_to_class = config.get("productNameToClass")

count = 0
for row in [
    {cursor.description[index][0]: column for index, column in enumerate(value)} for value in cursor.fetchall()
]:
    item_type = row["item_type"]
    source = 1

    if item_type in type_replacements:
        item_type = type_replacements[item_type]
        source = 2

    cursor.execute(
        "SELECT * FROM {db}.product_class WHERE product_subcategory='{item_type}'".format(
            db=DATABASE_NAME, item_type=item_type
        )
    )
    match = cursor.fetchall()

    if len(match) == 0:

        if item_type == "COFFEE/CREAMER" and "coffee" in row["product_name"].lower():
            item_type = "COFFEE"
            source = 4
        elif item_type == "FROZEN FOOD":
            if "waffle" in row["product_name"].lower():
                item_type = "WAFFLES"
                source = 4
        elif item_type == "JELLY/JAM":
            if "jam" in row["product_name"].lower():
                item_type = "JAM"
                source = 4
            elif "jelly" in row["product_name"].lower():
                item_type = "JELLY"
                source = 4
        elif item_type == "SNACKS":
            if "variety pack" in row["product_name"].lower():
                item_type = "CHIPS"
                source = 4
        elif item_type == "SODA/JUICE/DRINKS":
            if "cocoa" in row["product_name"].lower():
                item_type = "CHOCOLATE"
                source = 4

        # rules by manufacturer
        if row["manufacturer"] in manufacturer_to_class:
            item_type = manufacturer_to_class[row["manufacturer"]]
            source = 3
        elif row["manufacturer"] == "Starbucks" and "cocoa" in row["product_name"].lower():
            item_type = "CHOCOLATE"
            source = 5

        # rules by product_name
        for keyword in product_name_to_class.keys():
            if keyword in row["product_name"].lower():
                item_type = product_name_to_class[keyword]
                source = 6

        cursor.execute(
            "SELECT * FROM {db}.product_class WHERE product_subcategory='{item_type}'".format(
                db=DATABASE_NAME, item_type=item_type
            )
        )
        match = cursor.fetchall()

    if len(match) == 0:
        source = 0

    cursor.execute(
        """
        INSERT INTO {db}.product_dimension (sku, product_name, product_class_id, product_subcategory, product_category, product_department,
        product_family, size, number_per_case, brand_name, manufacturer, supplier, product_class_source_key)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        """.format(
            db=DATABASE_NAME
        ),
        (
            row["sku"],
            row["product_name"],
            0 if len(match) == 0 else match[0][0],
            None if len(match) == 0 else match[0][1],
            None if len(match) == 0 else match[0][2],
            None if len(match) == 0 else match[0][3],
            None if len(match) == 0 else match[0][4],
            row["size"],
            12,
            None,
            row["manufacturer"],
            "Rowan Dairy" if row["item_type"] == "MILK" else "Rowan Warehouse",
            source,
        ),
    )
    connection.commit()

In [16]:
cursor.execute("SELECT * FROM {db}.product_dimension LIMIT 25".format(db=DATABASE_NAME))
for row in cursor.fetchall():
    print(row)

(1, 42081001, 'Jambalaya Rice Mix', 57, 'RICE', 'STARCHY FOODS', 'STARCHY FOODS', 'FOOD', '12\xa0oz', 12, None, 'Zatarains', 'Rowan Warehouse', 2)
(2, 42082001, 'Jambalaya Rice Mix', 57, 'RICE', 'STARCHY FOODS', 'STARCHY FOODS', 'FOOD', '8\xa0oz', 12, None, 'Zatarains', 'Rowan Warehouse', 2)
(3, 42083001, 'Guacamole Regular', 83, 'DIPS', 'SNACK FOODS', 'SNACK FOODS', 'FOOD', '8\xa0oz', 12, None, 'Yucatan', 'Rowan Warehouse', 6)
(4, 42084001, 'Coffee Original Blend', 7, 'COFFEE', 'DRY GOODS', 'BAKING GOODS', 'DRINK', '12\xa0oz', 12, None, 'Yuban', 'Rowan Warehouse', 4)
(5, 42085001, 'GoGurt Variety Pack', 6, 'YOGURT', 'DAIRY', 'DAIRY', 'FOOD', '8\xa0ct', 12, None, 'Yoplait', 'Rowan Warehouse', 1)
(6, 42086001, 'Italian Dressing', 48, 'SAUCES', 'BAKING GOODS', 'BAKING GOODS', 'FOOD', '16\xa0oz', 12, None, 'Wishbone', 'Rowan Warehouse', 6)
(7, 42087001, 'Cheeseburger Heat & Serve Sliders', 65, 'HAMBURGER', 'MEAT', 'MEAT', 'FOOD', '29.28\xa0oz', 12, None, 'White Castle', 'Rowan Warehouse',

## Deliverable 2b

In [17]:
cursor.execute("SELECT * FROM {db}.product_class_source".format(db=DATABASE_NAME))
for row in cursor.fetchall():
    print(row)

(0, 'NO PRODUCT CLASS')
(1, 'FROM PRODUCT TABLE')
(2, 'SPECIFIC ITEM TYPE MAPPED TO SPECIFIC PRODUCT SUBCATEGORY')
(3, 'SPECIFIC MANUFACTURER MAPPED TO SPECIFIC PRODUCT SUBCATEGORY')
(4, 'BASED ON ITEM TYPE AND STRING SEARCH OF PRODUCT NAME')
(5, 'BASED ON MANUFACTURER AND STRING SEARCH OF PRODUCT NAME')
(6, 'BASED ON STRING SEARCH OF PRODUCT NAME')


## Deliverable 3

In [18]:
cursor.execute("DROP TABLE IF EXISTS {db}.stores".format(db=DATABASE_NAME))

cursor.execute(
    """
    CREATE TABLE {db}.stores (
        store_key SMALLINT PRIMARY KEY NOT NULL,
        store_manager TINYTEXT NOT NULL,
        street_address TINYTEXT NOT NULL,
        city TINYTEXT NOT NULL,
        state TINYTEXT NOT NULL,
        zip_code TINYTEXT NOT NULL,
        phone_number TINYTEXT NOT NULL
    )
    """.format(
        db=DATABASE_NAME
    )
)

query = "INSERT INTO {db}.stores (store_key, store_manager, street_address, city, state, zip_code, phone_number) VALUES (?, ?, ?, ?, ?, ?, ?)".format(
    db=DATABASE_NAME
)

records_to_insert = [
    (7, "Eric Mayo", "placeholder", "placeholder", "placeholder", "placeholder", "placeholder"),
    (8, "Jason Snouffer", "1640 Riverside Drive", "Hill Valley", "CA", "91905", "831-555-4385"),
]

cursor.executemany(query, records_to_insert)
connection.commit()

In [19]:
cursor.execute("SELECT * FROM {db}.stores".format(db=DATABASE_NAME))
for row in cursor.fetchall():
    print(row)

(7, 'Eric Mayo', 'placeholder', 'placeholder', 'placeholder', 'placeholder', 'placeholder')
(8, 'Jason Snouffer', '1640 Riverside Drive', 'Hill Valley', 'CA', '91905', '831-555-4385')


## Deliverable 4

In [20]:
cursor.execute("DROP TABLE IF EXISTS {db}.dates".format(db=DATABASE_NAME))
cursor.execute(
    """
    CREATE TABLE {db}.dates (
        date_key SMALLINT PRIMARY KEY NOT NULL,
        datetime DATE NOT NULL,
        day_in_month SMALLINT NOT NULL,
        day_in_year SMALLINT NOT NULL,
        week_number SMALLINT  NOT NULL,
        month_number SMALLINT NOT NULL,
        month_text TINYTEXT NOT NULL,
        quarter SMALLINT NOT NULL,
        year SMALLINT NOT NULL,
        fiscal_year SMALLINT NOT NULL,
        is_holiday BOOLEAN NOT NULL,
        is_weekend BOOLEAN NOT NULL,
        season TINYTEXT NOT NULL
    )
    """.format(
        db=DATABASE_NAME
    )
)

In [21]:
year = 2020
current_date: date = date(year, 1, 1)
stop_date: date = date(year, 12, 31)
fiscal_year_start = date(year, 8, 1)

holidays = (
    USFederalHolidayCalendar().holidays(start=current_date.isoformat(), end=stop_date.isoformat()).to_pydatetime()
)
holidays = [d.date() for d in holidays]

seasons = [
    ("winter", (date(year, 1, 1), date(year, 3, 20))),
    ("spring", (date(year, 3, 21), date(year, 6, 20))),
    ("summer", (date(year, 6, 21), date(year, 9, 22))),
    ("autumn", (date(year, 9, 23), date(year, 12, 20))),
    ("winter", (date(year, 12, 21), date(year, 12, 31))),
]

while current_date <= stop_date:
    time_tuple = current_date.timetuple()
    weekday = current_date.strftime("%A")

    cursor.execute(
        """
        INSERT IGNORE INTO {db}.dates (
            date_key, datetime, day_in_month, day_in_year, week_number, month_number, month_text,
            quarter, year, fiscal_year, is_holiday, is_weekend, season
        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        """.format(
            db=DATABASE_NAME
        ),
        (
            time_tuple.tm_yday,
            current_date.isoformat(),
            time_tuple.tm_mday,
            time_tuple.tm_yday,
            current_date.isocalendar()[1],
            time_tuple.tm_mon,
            current_date.strftime("%B"),
            pd.Timestamp(current_date).quarter,
            time_tuple.tm_year,
            time_tuple.tm_year if current_date >= fiscal_year_start else time_tuple.tm_year - 1,
            True if current_date in holidays else False,
            True if weekday == "Saturday" or weekday == "Sunday" else False,
            next(season for season, (start, end) in seasons if start <= current_date <= end),
        ),
    )

    current_date = current_date + timedelta(days=1)
connection.commit()

In [22]:
cursor.execute("SELECT * FROM {db}.dates LIMIT 25".format(db=DATABASE_NAME))
for row in cursor.fetchall():
    print(row)

(1, datetime.date(2020, 1, 1), 1, 1, 1, 1, 'January', 1, 2020, 2019, 1, 0, 'winter')
(2, datetime.date(2020, 1, 2), 2, 2, 1, 1, 'January', 1, 2020, 2019, 0, 0, 'winter')
(3, datetime.date(2020, 1, 3), 3, 3, 1, 1, 'January', 1, 2020, 2019, 0, 0, 'winter')
(4, datetime.date(2020, 1, 4), 4, 4, 1, 1, 'January', 1, 2020, 2019, 0, 1, 'winter')
(5, datetime.date(2020, 1, 5), 5, 5, 1, 1, 'January', 1, 2020, 2019, 0, 1, 'winter')
(6, datetime.date(2020, 1, 6), 6, 6, 2, 1, 'January', 1, 2020, 2019, 0, 0, 'winter')
(7, datetime.date(2020, 1, 7), 7, 7, 2, 1, 'January', 1, 2020, 2019, 0, 0, 'winter')
(8, datetime.date(2020, 1, 8), 8, 8, 2, 1, 'January', 1, 2020, 2019, 0, 0, 'winter')
(9, datetime.date(2020, 1, 9), 9, 9, 2, 1, 'January', 1, 2020, 2019, 0, 0, 'winter')
(10, datetime.date(2020, 1, 10), 10, 10, 2, 1, 'January', 1, 2020, 2019, 0, 0, 'winter')
(11, datetime.date(2020, 1, 11), 11, 11, 2, 1, 'January', 1, 2020, 2019, 0, 1, 'winter')
(12, datetime.date(2020, 1, 12), 12, 12, 2, 1, 'January',

## Deliverable 5

In [23]:
regen_sales_facts_transaction_level: bool = False
transaction_level_begin_date: str = "2020-12-01"
transaction_level_end_date: str = "2020-12-31"

if regen_sales_facts_transaction_level:
    cursor.execute("DROP TABLE IF EXISTS {db}.sales_facts_transaction_level".format(db=DATABASE_NAME))
    cursor.execute(
        """
        CREATE TABLE {db}.sales_facts_transaction_level (
            date_key SMALLINT UNSIGNED NOT NULL,
            customer_number BIGINT UNSIGNED NOT NULL,
            product_key INT UNSIGNED NOT NULL,
            store_key SMALLINT UNSIGNED NOT NULL,
            quantity_sold SMALLINT UNSIGNED NOT NULL,
            total_dollar_sales DECIMAL(65,2) UNSIGNED NOT NULL,
            total_cost_to_store DECIMAL(65,2) UNSIGNED NOT NULL,
            gross_profit DECIMAL(65,2) UNSIGNED NOT NULL,
            PRIMARY KEY (date_key, customer_number, product_key, store_key)
        )
        """.format(
            db=DATABASE_NAME
        )
    )

#### Ingest Team 8 data

In [24]:
if regen_sales_facts_transaction_level:
    cursor.execute(
        """
        INSERT INTO {db}.sales_facts_transaction_level (date_key, customer_number, product_key, store_key, quantity_sold, total_dollar_sales, total_cost_to_store, gross_profit)
        SELECT date_key, customer_id AS customer_number, product_key, store_key, quantity_sold, total_dollar_sales, total_cost_to_store, gross_profit FROM
        (SELECT * FROM 
        (SELECT t.*, product_dimension.product_key, quantity_sold * base_price AS total_cost_to_store, total_dollar_sales - (quantity_sold * base_price) as gross_profit FROM
            (SELECT transactions.*, COUNT(*) AS quantity_sold, SUM(sale_price) AS total_dollar_sales, base_price
            FROM team8.transactions
            JOIN {db}.product_catalog ON transactions.sku = product_catalog.sku
            WHERE transaction_date BETWEEN '{begin_date}' AND '{end_date}'
            GROUP BY sku, customer_id) t
            JOIN {db}.product_dimension ON t.sku = product_dimension.sku) t
        JOIN {db}.dates ON t.transaction_date = dates.datetime) t
        JOIN {db}.stores ON store_key = 8
        """.format(
            db=DATABASE_NAME,
            begin_date=transaction_level_begin_date,
            end_date=transaction_level_end_date,
        )
    )
    connection.commit()

#### Ingest Team 7 data

In [25]:
if regen_sales_facts_transaction_level:
    cursor.execute(
        """
        INSERT INTO {db}.sales_facts_transaction_level (date_key, customer_number, product_key, store_key, quantity_sold, total_dollar_sales, total_cost_to_store, gross_profit)
        SELECT date_key, `Customer Number` AS customer_number, product_key, store_key, quantity_sold, total_dollar_sales, total_cost_to_store, gross_profit FROM
        (SELECT * FROM 
        (SELECT t.*, product_dimension.product_key, quantity_sold * base_price AS total_cost_to_store, total_dollar_sales - (quantity_sold * base_price) as gross_profit FROM
            (SELECT purchases.*, COUNT(*) AS quantity_sold, SUM(`Sale Price`) AS total_dollar_sales, base_price
            FROM team7.purchases
            JOIN {db}.product_catalog ON purchases.sku = product_catalog.sku
            WHERE date BETWEEN '{begin_date}' AND '{end_date}'
            GROUP BY sku, `Customer Number`) t
            JOIN {db}.product_dimension ON t.sku = product_dimension.sku) t
        JOIN {db}.dates ON t.date = dates.datetime) t
        JOIN {db}.stores ON store_key = 7
        """.format(
            db=DATABASE_NAME,
            begin_date=transaction_level_begin_date,
            end_date=transaction_level_end_date,
        )
    )
    connection.commit()

In [26]:
cursor.execute(
    "SELECT * FROM {db}.sales_facts_transaction_level ORDER BY DATE_KEY, PRODUCT_KEY LIMIT 25".format(db=DATABASE_NAME)
)
tuples = cursor.fetchall()
lists = [list(x) for x in tuples]
table = columnar(
    lists,
    [
        "date_key",
        "customer_number",
        "product_key",
        "store_key",
        "quantity_sold",
        "total_dollar_sales",
        "total_cost_to_store",
        "gross_profit",
    ],
    no_borders=True,
    terminal_width=150,
)
print(table)

                  
  DATE_KEY  CUSTOMER_NUMBER  PRODUCT_KEY  STORE_KEY  QUANTITY_SOLD  TOTAL_DOLLAR_SALES  TOTAL_COST_TO_STORE  GROSS_PROFIT  
    
  336       928              1            7          2              5.33                4.98                 0.35          
  336       802              1            7          1              2.66                2.49                 0.17          
  336       900              1            7          3              7.99                7.47                 0.52          
  336       1091             1            7          1              2.66                2.49                 0.17          
  336       825              1            8          1              2.66                2.49                 0.17          
  336       405              1            7          1              2.66                2.49                 0.17          
  336       107              1            7          2              5.33                4.98                

## Deliverable 6

In [27]:
regen_sales_facts_daily_level: bool = False

if regen_sales_facts_daily_level:
    cursor.execute("DROP TABLE IF EXISTS {db}.sales_facts_daily_level".format(db=DATABASE_NAME))
    cursor.execute(
        """
        CREATE TABLE {db}.sales_facts_daily_level (
            date_key SMALLINT UNSIGNED NOT NULL,
            product_key INT UNSIGNED NOT NULL,
            store_key SMALLINT UNSIGNED NOT NULL,
            number_sold_today SMALLINT UNSIGNED NOT NULL,
            cost_of_items_sold DECIMAL(65,2) UNSIGNED NOT NULL,
            sales_total DECIMAL(65,2) UNSIGNED NOT NULL,
            gross_profit DECIMAL(65,2) UNSIGNED NOT NULL,
            PRIMARY KEY (date_key, product_key, store_key)
        )
        """.format(
            db=DATABASE_NAME
        )
    )

#### Ingest Team 8 data

In [28]:
if regen_sales_facts_daily_level:
    cursor.execute(
        """
        INSERT INTO {db}.sales_facts_daily_level (date_key, product_key, store_key, number_sold_today, cost_of_items_sold, sales_total, gross_profit)
        SELECT date_key, product_key, store_key, number_sold_today, cost_of_items_sold, sales_total, gross_profit FROM
        (SELECT * FROM 
        (SELECT t.*, product_dimension.product_key, number_sold_today * base_price AS cost_of_items_sold, sales_total - (number_sold_today * base_price) as gross_profit FROM
            (SELECT transactions.*, COUNT(*) AS number_sold_today, SUM(sale_price) AS sales_total, base_price
            FROM team8.transactions
            JOIN {db}.product_catalog ON transactions.sku = product_catalog.sku
            GROUP BY sku, transaction_date) t
            JOIN {db}.product_dimension ON t.sku = product_dimension.sku) t
        JOIN {db}.dates ON t.transaction_date = dates.datetime) t
        JOIN {db}.stores ON store_key = 8
        """.format(
            db=DATABASE_NAME
        )
    )
    connection.commit()

#### Ingest Team 7 data

In [29]:
if regen_sales_facts_daily_level:
    cursor.execute(
        """
        INSERT INTO {db}.sales_facts_daily_level (date_key, product_key, store_key, number_sold_today, cost_of_items_sold, sales_total, gross_profit)
        SELECT date_key, product_key, store_key, number_sold_today, cost_of_items_sold, sales_total, gross_profit FROM
        (SELECT * FROM 
        (SELECT t.*, product_dimension.product_key, number_sold_today * base_price AS cost_of_items_sold, sales_total - (number_sold_today * base_price) as gross_profit FROM
            (SELECT purchases.*, COUNT(*) AS number_sold_today, SUM(`Sale Price`) AS sales_total, base_price
            FROM team7.purchases
            JOIN {db}.product_catalog ON purchases.sku = product_catalog.sku
            GROUP BY sku, Date) t
            JOIN {db}.product_dimension ON t.sku = product_dimension.sku) t
        JOIN {db}.dates ON t.Date = dates.datetime) t
        JOIN {db}.stores ON store_key = 7
        """.format(
            db=DATABASE_NAME
        )
    )
    connection.commit()

In [30]:
cursor.execute(
    "SELECT * FROM {db}.sales_facts_daily_level ORDER BY DATE_KEY, PRODUCT_KEY LIMIT 25".format(db=DATABASE_NAME)
)
tuples = cursor.fetchall()
lists = [list(x) for x in tuples]
table = columnar(
    lists,
    [
        "date_key",
        "product_key",
        "store_key",
        "number_sold_today",
        "cost_of_items_sold",
        "sales_total",
        "gross_profit",
    ],
    no_borders=True,
    terminal_width=150,
)
print(table)

                
  DATE_KEY  PRODUCT_KEY  STORE_KEY  NUMBER_SOLD_TODAY  COST_OF_ITEMS_SOLD  SALES_TOTAL  GROSS_PROFIT  
    
  1         1            7          35                 87.15               93.25        6.10          
  1         1            8          41                 102.09              109.06       6.97          
  1         2            7          23                 41.17               44.05        2.88          
  1         2            8          33                 59.07               63.36        4.29          
  1         3            7          31                 123.69              132.35       8.66          
  1         3            8          30                 119.70              128.10       8.40          
  1         4            7          26                 103.74              111.00       7.26          
  1         4            8          30                 119.70              128.10       8.40          
  1         5            7          36             

## Deliverable 7

In [31]:
regen_inventory_facts_daily_level: bool = False

if regen_inventory_facts_daily_level:
    cursor.execute("DROP TABLE IF EXISTS {db}.inventory_facts_daily_level".format(db=DATABASE_NAME))
    cursor.execute(
        """
        CREATE TABLE {db}.inventory_facts_daily_level (
            date_key SMALLINT UNSIGNED NOT NULL,
            product_key INT UNSIGNED NOT NULL,
            store_key SMALLINT UNSIGNED NOT NULL,
            number_available SMALLINT UNSIGNED NOT NULL,
            cost_to_store_per_item DECIMAL(65,2) UNSIGNED NOT NULL,
            cost_to_store_per_case DECIMAL(65,2) UNSIGNED NOT NULL,
            cases_purchased_to_date INT UNSIGNED NOT NULL,
            PRIMARY KEY (date_key, product_key, store_key)
        )
        """.format(
            db=DATABASE_NAME
        )
    )

#### Ingest Team 8 data

In [32]:
if regen_inventory_facts_daily_level:
    cursor.execute(
        """
        INSERT INTO {db}.inventory_facts_daily_level (date_key, product_key, store_key, number_available, cost_to_store_per_item, cost_to_store_per_case, cases_purchased_to_date)
        SELECT date_key, product_key, store_key, number_available, cost_to_store_per_item, cost_to_store_per_case, cases_purchased_to_date FROM
        (SELECT * FROM 
        (SELECT t.*, product_dimension.product_key, base_price AS cost_to_store_per_item, number_per_case * base_price as cost_to_store_per_case FROM
            (SELECT transactions.*, base_price, MIN(items_left) AS number_available, MAX(total_cases_ordered) AS cases_purchased_to_date
            FROM team8.transactions
            JOIN {db}.product_catalog ON transactions.sku = product_catalog.sku
            GROUP BY sku, transaction_date) t
            JOIN {db}.product_dimension ON t.sku = product_dimension.sku) t
        JOIN {db}.dates ON t.transaction_date = dates.datetime) t
        JOIN {db}.stores ON store_key = 8
        """.format(
            db=DATABASE_NAME
        )
    )
    connection.commit()

#### Ingest Team 7 data

In [33]:
if regen_inventory_facts_daily_level:
    cursor.execute(
        """
        INSERT INTO {db}.inventory_facts_daily_level (date_key, product_key, store_key, number_available, cost_to_store_per_item, cost_to_store_per_case, cases_purchased_to_date)
        SELECT date_key, product_key, store_key, number_available, cost_to_store_per_item, cost_to_store_per_case, cases_purchased_to_date FROM
        (SELECT * FROM 
        (SELECT t.*, product_dimension.product_key, base_price AS cost_to_store_per_item, number_per_case * base_price as cost_to_store_per_case FROM
            (SELECT purchases.*, base_price, MIN(`Items Left`) AS number_available, MAX(`Cases Purchased`) AS cases_purchased_to_date
            FROM team7.purchases
            JOIN {db}.product_catalog ON purchases.sku = product_catalog.sku
            GROUP BY sku, Date) t
            JOIN {db}.product_dimension ON t.sku = product_dimension.sku) t
        JOIN {db}.dates ON t.Date = dates.datetime) t
        JOIN {db}.stores ON store_key = 7
        """.format(
            db=DATABASE_NAME
        )
    )
    connection.commit()

In [34]:
cursor.execute(
    "SELECT * FROM {db}.inventory_facts_daily_level ORDER BY DATE_KEY DESC, PRODUCT_KEY LIMIT 25".format(
        db=DATABASE_NAME
    )
)
tuples = cursor.fetchall()
lists = [list(x) for x in tuples]
table = columnar(
    lists,
    [
        "date_key",
        "product_key",
        "store_key",
        "number_available",
        "cost_to_store_per_item",
        "cost_to_store_per_case",
        "cases_purchased_to_date",
    ],
    no_borders=True,
    terminal_width=150,
)
print(table)

                
  DATE_KEY  PRODUCT_KEY  STORE_KEY  NUMBER_AVAILABLE  COST_TO_STORE_PER_ITEM  COST_TO_STORE_PER_CASE  CASES_PURCHASED_TO_DATE  
    
  366       1            8          75                2.49                    29.88                   928                      
  366       1            7          51                2.49                    29.88                   906                      
  366       2            8          78                1.79                    21.48                   948                      
  366       2            7          65                1.79                    21.48                   917                      
  366       3            7          59                3.99                    47.88                   910                      
  366       3            8          65                3.99                    47.88                   935                      
  366       4            7          66                3.99                    47.8

## Deliverable 8-11

In [35]:
regen_inventory_facts_quarterly: bool = False

if regen_inventory_facts_quarterly:
    cursor.execute("DROP TABLE IF EXISTS {db}.inventory_facts_quarter_level".format(db=DATABASE_NAME))
    cursor.execute(
        """
        CREATE TABLE {db}.inventory_facts_quarter_level (
            product_key INT UNSIGNED NOT NULL,
            store_key SMALLINT UNSIGNED NOT NULL,
            quarter_year TINYTEXT NOT NULL,
            quarter SMALLINT NOT NULL,
            year SMALLINT NOT NULL,
            cases_purchased_to_date INT UNSIGNED NOT NULL,
            cases_purchased_this_quarter INT UNSIGNED NOT NULL,
            cases_on_hand SMALLINT UNSIGNED NOT NULL,
            total_cost_to_store_this_quarter DECIMAL(65,2) UNSIGNED NOT NULL,
            total_sold_by_store_this_quarter DECIMAL(65,2) UNSIGNED NOT NULL,
            total_cost_to_store_ytd DECIMAL(65,2) UNSIGNED NOT NULL,
            total_sold_by_store_ytd DECIMAL(65,2) UNSIGNED NOT NULL,
            PRIMARY KEY (product_key, store_key, quarter, year)
        )
        """.format(
            db=DATABASE_NAME
        )
    )

##### Making working table

In [36]:
if regen_inventory_facts_quarterly:
    cursor.execute("DROP TABLE IF EXISTS {db}.inventory_facts_quarter_level_working".format(db=DATABASE_NAME))
    cursor.execute(
        """
        CREATE TABLE {db}.inventory_facts_quarter_level_working (
            date_key SMALLINT UNSIGNED NOT NULL,
            product_key INT UNSIGNED NOT NULL,
            store_key SMALLINT UNSIGNED NOT NULL,
            number_available INT UNSIGNED NOT NULL,
            cost_to_store_per_item DECIMAL(65,2) UNSIGNED NOT NULL,
            cost_to_store_per_case DECIMAL(65,2) UNSIGNED NOT NULL,
            cases_purchased_to_date INT UNSIGNED NOT NULL,
            quarter SMALLINT NOT NULL,
            year SMALLINT NOT NULL,
            sku INT UNSIGNED NOT NULL,
            number_per_case INT UNSIGNED NOT NULL,
            base_price DECIMAL(65,2) UNSIGNED NOT NULL,
            number_sold_today INT UNSIGNED NOT NULL,
            gross_profit DECIMAL(65,2) UNSIGNED NOT NULL,
            PRIMARY KEY (date_key, product_key, store_key)
        )
        SELECT t.*, number_sold_today, gross_profit FROM
        (SELECT t.*, base_price FROM
        (SELECT t.*, sku, number_per_case FROM
        (SELECT inventory_facts_daily_level.*, quarter, year FROM {db}.inventory_facts_daily_level
        JOIN {db}.dates ON inventory_facts_daily_level.date_key = dates.date_key
        GROUP BY product_key, store_key, date_key) t
        JOIN {db}.product_dimension ON t.product_key = product_dimension.product_key) t
        JOIN {db}.product_catalog ON t.sku = product_catalog.sku) t
        JOIN {db}.sales_facts_daily_level ON t.product_key = sales_facts_daily_level.product_key AND
            t.store_key = sales_facts_daily_level.store_key AND t.date_key = sales_facts_daily_level.date_key
        """.format(
            db=DATABASE_NAME
        )
    )

In [37]:
cursor.execute("SELECT * FROM {db}.inventory_facts_quarter_level_working".format(db=DATABASE_NAME))
columns: list = [desc[0] for desc in cursor.description]
df: pd.DataFrame = pd.DataFrame(data=cursor.fetchall(), columns=columns)

In [38]:
if regen_inventory_facts_quarterly:
    for store_key in df["store_key"].unique():
        for product_key in df["product_key"].unique():
            previous_cases_purchased = 0
            items_sold_ytd = 0
            total_sold_by_store_ytd = 0
            for quarter in df["quarter"].unique():
                slice = df.loc[
                    (df["store_key"] == store_key) & (df["quarter"] == quarter) & (df["product_key"] == product_key)
                ]

                cases_purchased_to_date = slice["cases_purchased_to_date"].max()
                cases_purchased_this_quarter = cases_purchased_to_date - previous_cases_purchased
                previous_cases_purchased = cases_purchased_to_date

                last_day_of_quarter = slice.loc[slice["date_key"].idxmax()]
                cases_on_hand = int(last_day_of_quarter["number_available"] / last_day_of_quarter["number_per_case"])

                items_sold_this_quarter = slice["number_sold_today"].sum()
                total_cost_to_store_this_quarter = (
                    items_sold_this_quarter * last_day_of_quarter["cost_to_store_per_item"]
                )
                total_sold_by_store_this_quarter = slice["gross_profit"].sum() + total_cost_to_store_this_quarter

                items_sold_ytd += items_sold_this_quarter
                total_cost_to_store_ytd = items_sold_ytd * last_day_of_quarter["cost_to_store_per_item"]
                total_sold_by_store_ytd += total_sold_by_store_this_quarter

                cursor.execute(
                    """
                    INSERT IGNORE INTO {db}.inventory_facts_quarter_level (
                        product_key, store_key, quarter_year, quarter, year, cases_purchased_to_date, cases_purchased_this_quarter,
                        cases_on_hand, total_cost_to_store_this_quarter, total_sold_by_store_this_quarter, total_cost_to_store_ytd, total_sold_by_store_ytd
                    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                    """.format(
                        db=DATABASE_NAME
                    ),
                    (
                        product_key.item(),
                        store_key.item(),
                        str(quarter) + " - " + str(last_day_of_quarter["year"]),
                        quarter.item(),
                        last_day_of_quarter["year"].item(),
                        cases_purchased_to_date.item(),
                        cases_purchased_this_quarter.item(),
                        cases_on_hand,
                        total_cost_to_store_this_quarter,
                        total_sold_by_store_this_quarter,
                        total_cost_to_store_ytd,
                        total_sold_by_store_ytd,
                    ),
                )
    connection.commit()

In [39]:
cursor.execute(
    """
    SELECT product_key, store_key, quarter, cases_purchased_to_date, cases_purchased_this_quarter, cases_on_hand,
        total_cost_to_store_this_quarter, total_sold_by_store_this_quarter, total_cost_to_store_ytd, total_sold_by_store_ytd
    FROM {db}.inventory_facts_quarter_level ORDER BY quarter ASC, product_key LIMIT 25
    """.format(
        db=DATABASE_NAME
    )
)
tuples = cursor.fetchall()
lists = [list(x) for x in tuples]
table = columnar(
    lists,
    [
        "product_key",
        "store_key",
        "qtr",
        "cases_purchased_ytd",
        "cases_purchased",
        "cases_on_hand",
        "total_cost_to_store",
        "total_sold_by_store",
        "total_cost_to_store_ytd",
        "total_sold_by_store_ytd",
    ],
    no_borders=True,
    terminal_width=150,
)
print(table)

                      
  PRODUCT_KEY  STORE_KEY  QTR    CASES_PURCHASED  CASES_PURCHASED  CASES_ON_HAND  TOTAL_COST_TO_S  TOTAL_SOLD_BY_S  TOTAL_COST_TO_S  TOTAL_SOLD_BY_S  
                                 _YTD                                             TORE             TORE             TORE_YTD         TORE_YTD         
    
  1            7          1      229              229              6              6655.77          7121.65          6655.77          7121.65          
  1            8          1      229              229              6              6889.83          7360.22          6889.83          7360.22          
  2            7          1      233              233              5              4881.33          5222.94          4881.33          5222.94          
  2            8          1      226              226              5              4904.60          5260.80          4904.60          5260.80          
  3            8          1      234              234             