In [2]:
import mariadb
import os
from columnar import columnar
from csv import DictReader
from dotenv import load_dotenv

_ = load_dotenv("config.env")

In [3]:
connection = mariadb.connect(
    host="127.0.0.1",
    port=23306,
    user=os.getenv("user"),
    password=os.getenv("password"),
    autocommit=False,
)
cursor = connection.cursor()

In [None]:
cursor.execute(
    """
    CREATE TABLE IF NOT EXISTS data_mart.product_class (
        product_class_id INT PRIMARY KEY NOT NULL,
        product_subcategory TEXT NOT NULL,
        product_category TEXT NOT NULL,
        product_department TEXT NOT NULL,
        product_family TEXT NOT NULL
    )
    """
)

In [None]:
with open("product_class.txt") as csv_file:
    csv_reader = DictReader(csv_file, delimiter='\t')
    for row in csv_reader:
        result = cursor.execute(
            "INSERT IGNORE INTO data_mart.product_class (product_class_id, product_subcategory, product_category, product_department, product_family) VALUES (?, ?, ?, ?, ?)",
            (
                row['product_class_id'],
                row['product_subcategory'].upper(),
                row['product_category'].upper(),
                row['product_department'].upper(),
                row['product_family'].upper(),
            ),
        )
connection.commit()

In [32]:
cursor.execute(
    """
    SELECT * FROM
        (SELECT REPLACE(REPLACE(item_type, 'PASTA/NOODLES', 'PASTA'), 'RICE/RICE MIX', 'RICE') as item_type,
            manufacturer, product_name FROM data_mart.product_catalog) t
    LEFT JOIN data_mart.product_class ON t.item_type = product_class.product_subcategory
    WHERE product_subcategory IS NULL
    """
)
tuples = cursor.fetchall()
lists = [list(x) for x in tuples]
table = columnar(
    lists,
    [
        "item_type",
        "manufacturer",
        "product_name",
    ],
    no_borders=True,
    terminal_width=150,
)
print(len(lists))
# print(table)

830


In [22]:
cursor.execute(
    """
    SELECT product_subcategory, product_category, product_department, product_family FROM data_mart.product_class
    LEFT JOIN data_mart.product_catalog ON product_catalog.item_type = product_class.product_subcategory
    WHERE item_type IS NULL
    """
)
tuples = cursor.fetchall()
lists = [list(x) for x in tuples]
table = columnar(
    lists,
    [
        "product_subcategory",
        "product_category",
        "product_department",
        "product_family",
    ],
    no_borders=True,
    terminal_width=150,
)
print(len(lists))
print(table)

62
          
  PRODUCT_SUBCATEGORY  PRODUCT_CATEGORY   PRODUCT_DEPARTMENT   PRODUCT_FAMILY  
    
  SHELLFISH            SEAFOOD            SEAFOOD              FOOD            
  PASTA                STARCHY FOODS      STARCHY FOODS        FOOD            
  COFFEE               DRY GOODS          BAKING GOODS         DRINK           
  CHIPS                SNACK FOODS        SNACK FOODS          FOOD            
  DRIED MEAT           SNACK FOODS        SNACK FOODS          FOOD            
  PAPER WIPES          PAPER PRODUCTS     HOUSEHOLD            NON-CONSUMABLE  
  SHOWER SOAP          CLEANING SUPPLIES  HOUSEHOLD            NON-CONSUMABLE  
  BAGELS               BREAD              BAKED GOODS          FOOD            
  MUFFINS              BREAD              BAKED GOODS          FOOD            
  SLICED BREAD         BREAD              BAKED GOODS          FOOD            
  PANCAKE MIX          BREAKFAST FOODS    FROZEN FOODS         FOOD            
  PANCAKES           

In [168]:
cursor.execute("SELECT * FROM data_mart.product_catalog")

type_replacements = {
    'PASTA/NOODLES': 'PASTA',
    'MAC & CHEESE': 'PASTA',
    'RICE/RICE MIX': 'RICE',
    'GRAVY/SAUCE': 'SAUCES',
    'SALAD DRESSING': 'SAUCES',
    'CAKE/BAKING MIXES': 'PANCAKE MIX',
}

manufacturer_to_class = {
    'Swiss Miss': 'CHOCOLATE',
    'Nesquik': 'CHOCOLATE',
    'Ovaltine': 'CHOCOLATE',
    'Mahatma': 'RICE',
    'Soft & Precious': 'PERSONAL HYGIENE',
    'Crest': 'PERSONAL HYGIENE',
    'Stubbs': 'SAUCES',
    'Sunny Delight Drinks': 'JUICE',
    'Sunny D': 'JUICE',
    'Welchs': 'JUICE',
    'Powerade': 'FLAVORED DRINKS',
    'Capri Sun': 'FLAVORED DRINKS',
    'Lipton': 'FLAVORED DRINKS',
    'Nestea': 'FLAVORED DRINKS',
    'Minute Maid': 'JUICE',
    'Hawaiian Punch': 'FLAVORED DRINKS',
    'Morton': 'SPICES',
    'Campbells': 'SOUP',
    'Snapple': 'FLAVORED DRINKS',
    'Sparkling ICE': 'FLAVORED DRINKS',
    'Pringles': 'CHIPS',
    'Clorox': 'CLEANERS',
    'Scotch-Brite': 'CLEANERS',
    'Kool Aid': 'FLAVORED DRINKS',
    'Orville Redenbachers': 'POPCORN',
    'Produce': 'FRESH VEGETABLES',
    'Big K': 'SODA',
    'Sargento': 'CHEESE',
    'Oxi Clean': 'CLEANERS',
    'Palmolive': 'DISH SOAP',
    'Chef Boyardee': 'PASTA',
    'All': 'CLEANERS',
    'Lean Cusine': 'TV DINNER',
    'Reeses': 'CHOCOLATE CANDY',
    'Pediacare': 'COLD REMEDIES',
    'Carnation': 'FLAVORED DRINKS',
    'Classico': 'SAUCES',
    'Febreze': 'DEODORIZERS',
    'Entenmanns': 'COOKIES',
    'Little Debbie': 'COOKIES',
    'Tastykake': 'COOKIES',
    'Hostess': 'COOKIES',
    'Drakes': 'COOKIES',
    'All But Gluten': 'COOKIES',
    'Pepperidge Farm': 'SLICED BREAD',
    'Hershey': 'SAUCES',
    'Hersheys': 'SAUCES',
    'Heinz': 'SAUCES',
    'Motrin Infant': 'IBUPROFEN',
    'Nestle': 'FLAVORED DRINKS',
    'Jumex': 'FLAVORED DRINKS',
    'Gatorade': 'FLAVORED DRINKS',
    'Got Milk': 'FLAVORED DRINKS',
    'Stouffer\'s': 'TV DINNER',
    'Starkist': 'TUNA',
    'Banquet': 'TV DINNER',
    'On Cor': 'TV DINNER',
    'Lean Pockets': 'TV DINNER',
    'Koch Foods': 'TV DINNER',
    'Barber': 'TV DINNER',
    'Atkins': 'TV DINNER',
    'Aidells': 'TV DINNER',
    'Al Fresco': 'TV DINNER',
    'Dole': 'BANANAS',
    'Listerine': 'MOUTHWASH',
    'Little Noses': 'NASAL SPRAYS',
    'Mrs. Buttersworths': 'SAUCES',
    'Motts': 'SAUCES',
    'Country Time': 'FLAVORED DRINKS',
    'Oregon': 'CANNED FRUIT',
    'King Arthur': 'SPICES',
    'Alpine': 'FLAVORED DRINKS',
    'Act II': 'SAUCES',
    'Idahoan': 'FROZEN VEGETABLES',
    'Gefen': 'SPICES',
}

product_name_to_class = {
    'cookie': 'COOKIES',
    'bagel': 'BAGELS',
    'guacamole': 'DIPS',
    'cocoa': 'CHOCOLATE',
    'donut': 'DONUTS',
    'muffin': 'MUFFINS',
    'eggs': 'EGGS',
    'juice': 'JUICE',
    'pretzel': 'PRETZELS',
    'wheat thins': 'CRACKERS',
    'chips ahoy': 'COOKIES',
    'lays poppables': 'CHIPS',
    'lays stax': 'CHIPS',
    'lays variety': 'CHIPS',
    'stew': 'SOUP',
    'doritos': 'CHIPS',
    'kettle': 'CHIPS',
    'ruffles': 'CHIPS',
    'lays dip': 'DIPS',
    'tostitos dip': 'DIPS',
    'tostitos cantina': 'CHIPS',
    'tostitos bite': 'CHIPS',
    'tostitos simply': 'CHIPS',
    'potato crisps': 'CHIPS',
    'corn crisps': 'CHIPS',
    'potato chips': 'CHIPS',
    'tortilla chips': 'CHIPS',
    'goldfish': 'CRACKERS',
    'iced tea': 'FLAVORED DRINKS',
    'salsa dip': 'DIPS',
    'tostitos salsa': 'DIPS',
    'salsa mild': 'DIPS',
    'salsa chunky': 'DIPS',
    'salsa black': 'DIPS',
    'all purpose cleaner': 'CLEANERS',
    'shower': 'SHOWER SOAP',
    'seasoning': 'SPICES',
    'mac & cheese': 'PASTA',
    'american singles': 'CHEESE',
    'hot dogs': 'HOT DOGS',
    'shampoo': 'SHAMPOO',
    'vitamin water': 'FLAVORED DRINKS',
    'almond milk': 'FLAVORED DRINKS',
    'soy': 'FLAVORED DRINKS',
    'coconut milk': 'FLAVORED DRINKS',
    'sugar white': 'SUGAR',
    'bar': 'COOKIES',
    'pastry crisps': 'COOKIES',
    'baked beans': 'SOUP',
    'french fries': 'FRENCH FRIES',
    'vinegar balsamic': 'SAUCES',
    'dressing': 'SAUCES',
    'pancakes': 'PANCAKES',
    'biscuits': 'SLICED BREAD',
    'rice krispies treats': 'COOKIES',
    'snack bites': 'COOKIES',
    'sour cream': 'SOUR CREAM',
    'fruit cup': 'FRESH FRUIT',
    'raisins': 'DRIED FRUIT',
    'cranberry sauce': 'SAUCES',
    'cool whip': 'SAUCES',
    'cooking stock': 'SOUP',
    'broth': 'SOUP',
}

count = 0
for row in [{cursor.description[index][0]:column for index, column in enumerate(value)} for value in cursor.fetchall()]:
    item_type = row["item_type"]
    item_type = type_replacements[item_type] if item_type in type_replacements else item_type
    
    cursor.execute("SELECT * FROM data_mart.product_class WHERE product_subcategory='{item_type}'".format(item_type=item_type))
    match = cursor.fetchall()
    
    if len(match) == 0:
        
        # rules by item_type
        if item_type == "BREAD":
            item_type = "SLICED BREAD"
        elif item_type == "COFFEE/CREAMER" and "coffee" in row["product_name"].lower():
            item_type = "COFFEE"
        elif item_type == "FROZEN FOOD":
            if "waffle" in row["product_name"].lower():
                item_type = "WAFFLES"
        elif item_type == "JELLY/JAM":
            if "jam" in row["product_name"].lower():
                item_type = "JAM"
            elif "jelly" in row["product_name"].lower():
                item_type = "JELLY"
        elif item_type == "SNACKS":
            if "variety pack" in row["product_name"].lower():
                item_type = "CHIPS"   
        elif item_type == "SODA/JUICE/DRINKS":
            if "cocoa" in row["product_name"].lower():
                item_type = "CHOCOLATE"
        
        # rules by manufacturer
        if row["manufacturer"] in manufacturer_to_class:
            item_type = manufacturer_to_class[row["manufacturer"]]
        elif row["manufacturer"] == "Starbucks" and "cocoa" in row["product_name"].lower():
             item_type = "CHOCOLATE"
    
        # rules by product_name
        for keyword in product_name_to_class.keys():
             if keyword in row["product_name"].lower():
                item_type = product_name_to_class[keyword]
        
        if "burger" in row["product_name"].lower() and item_type != "SNACKS":
            item_type = "HAMBURGER"
        
        cursor.execute("SELECT * FROM data_mart.product_class WHERE product_subcategory='{item_type}'".format(item_type=item_type))
        match = cursor.fetchall()
            
    
    if len(match) == 0:
        print(row)
        count = count + 1
count

{'manufacturer': 'Whiskas', 'product_name': 'Choice Cuts Poultry', 'size': '36\xa0oz', 'item_type': 'PET FOOD', 'sku': 42088001, 'base_price': Decimal('4.99')}
{'manufacturer': 'Stove Top', 'product_name': 'Stuffing Mix Pork', 'size': '6\xa0oz', 'item_type': '', 'sku': 42270001, 'base_price': Decimal('2.09')}
{'manufacturer': 'Steakumm', 'product_name': 'Sandwich Steaks Thin Sliced', 'size': '21\xa0oz', 'item_type': '', 'sku': 42272001, 'base_price': Decimal('4.39')}
{'manufacturer': 'Purnell', 'product_name': 'Sausage Pork', 'size': '1\xa0lbs', 'item_type': '', 'sku': 42395001, 'base_price': Decimal('3.29')}
{'manufacturer': 'Pet', 'product_name': 'Evaporated Milk Original', 'size': '12\xa0oz', 'item_type': 'BAKING SUPPLIES', 'sku': 42511001, 'base_price': Decimal('1.79')}
{'manufacturer': 'Pedigree', 'product_name': 'Chopped Beef Dog Food', 'size': '22\xa0oz', 'item_type': '', 'sku': 42595001, 'base_price': Decimal('1.28')}
{'manufacturer': 'Open Nature', 'product_name': 'Meatballs A

28