In [1]:
import sqlite3
import pandas as pd

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_colwidth', 100)

In [5]:
DB_FILE = "../data/db/products.db"
conn = sqlite3.connect(DB_FILE)

df = pd.read_sql_query("SELECT * FROM product_details", conn)

conn.close()


In [8]:
# updates for database and webscraping code
# TODO brand_source_id is not working - null
# TODO remove "root" from category cols, reverse order of string inserted into db 

# data cleaning
# TODO parse parent product code from url 
# TODO clean price column
# TODO clean size column 



In [52]:
df.columns

Index(['id', 'target_url', 'product_code', 'loves_count', 'rating', 'reviews',
       'brand_source_id', 'category_root_id', 'category_root_name',
       'category_root_url', 'sku_id', 'brand_name', 'display_name',
       'ingredients', 'limited_edition', 'first_access', 'limited_time_offer',
       'new_product', 'online_only', 'few_left', 'out_of_stock', 'price',
       'max_purchase_quantity', 'size', 'type', 'url', 'variation_type',
       'variation_value', 'returnable', 'finish_refinement', 'size_refinement',
       'created_at'],
      dtype='object')

In [49]:
def clean_compressed_product_hierarchy(df, col, delimiter=' --- ', code_prefix_to_strip='cat'):
    clean_col = df[col].str.replace(code_prefix_to_strip,"")
    clean_col = clean_col.str.split(delimiter)
    clean_col = clean_col.apply(lambda x : x[::-1])

    return pd.DataFrame(clean_col.to_list(), columns=[f'{col}_l1', f'{col}_l2', f'{col}_l3'])



In [61]:
df = pd.concat([
    df, 
    clean_compressed_product_hierarchy(df, 'category_root_id', delimiter=' --- ', code_prefix_to_strip='cat'),
    clean_compressed_product_hierarchy(df, 'category_root_name', delimiter=' --- ', code_prefix_to_strip=''),
    clean_compressed_product_hierarchy(df, 'category_root_url', delimiter=' --- ', code_prefix_to_strip='/shop/')
], axis=1)

df = df.drop(['category_root_id', 'category_root_name', 'category_root_url'], axis=1)

In [62]:
df

Unnamed: 0,id,target_url,product_code,loves_count,rating,reviews,brand_source_id,sku_id,brand_name,display_name,ingredients,limited_edition,first_access,limited_time_offer,new_product,online_only,few_left,out_of_stock,price,max_purchase_quantity,size,type,url,variation_type,variation_value,returnable,finish_refinement,size_refinement,created_at,category_root_id_l1,category_root_id_l2,category_root_id_l3,category_root_name_l1,category_root_name_l2,category_root_name_l3,category_root_url_l1,category_root_url_l2,category_root_url_l3,category_root_id_l1.1,category_root_id_l2.1,category_root_id_l3.1,category_root_name_l1.1,category_root_name_l2.1,category_root_name_l3.1,category_root_url_l1.1,category_root_url_l2.1,category_root_url_l3.1,category_root_id_l1.2,category_root_id_l2.2,category_root_id_l3.2,category_root_name_l1.2,category_root_name_l2.2,category_root_name_l3.2,category_root_url_l1.2,category_root_url_l2.2,category_root_url_l3.2
0,1,/product/aavrani-hair-density-boosting-treatment-recovery-oil-mini-set-P510548,P510548,7841,4.6746,126,,2760353,AAVRANI,2760353,-Bond Complex: Combats damage and retains the strength of strands through multiple washes.<br>-T...,0,0,0,0,1,0,0,$47.00,10,,Standard,https://www.sephora.com:443/v1/catalog/skus/2760353,,,1,,,2025-01-03 05:38:32,130038,60143,,Hair,Value & Gift Sets,,hair-products,hair-care-sets,,130038,60143,,Hair,Value & Gift Sets,,hair-products,hair-care-sets,,130038,60143,,Hair,Value & Gift Sets,,hair-products,hair-care-sets,
1,2,/product/aavrani-jelly-clarifying-detox-shampoo-for-scalp-hair-P513304,P513304,6029,4.7414,116,,2797074,AAVRANI,2797074 8.4 oz / 250 ml,-Lotus: An antioxidant that nourishes and softens hair while balancing the scalp.<br>-Vitamin E ...,0,0,0,0,1,0,0,$46.00,10,8.4 oz / 250 ml,Standard,https://www.sephora.com:443/v1/catalog/skus/2797074,Size,8.4 oz / 250 ml,1,,,2025-01-03 05:38:36,130038,1230050,60127,Hair,Shampoo & Conditioner,Shampoo,hair-products,shampoo-conditioner,shampoo-sulfate-free-shampoo,130038,1230050,60127,Hair,Shampoo & Conditioner,Shampoo,hair-products,shampoo-conditioner,shampoo-sulfate-free-shampoo,130038,1230050,60127,Hair,Shampoo & Conditioner,Shampoo,hair-products,shampoo-conditioner,shampoo-sulfate-free-shampoo
2,3,/product/aavrani-hair-density-boosting-treatment-P510710,P510710,14409,4.5391,243,,2760346,AAVRANI,2760346 2 oz / 60 ml - 4 Month Supply,"<p>-Turmeric Stem Cells: Help reset the hair bulb for fuller-, thicker-looking hair.</p><p>-Vega...",0,0,0,0,1,0,0,$65.00,10,2 oz / 60 ml - 4 Month Supply,Standard,https://www.sephora.com:443/v1/catalog/skus/2760346,Size,2 oz / 60 ml - 4 Month Supply,1,,,2025-01-03 05:38:41,130038,1230049,1280031,Hair,Hair Styling & Treatments,Hair Thinning & Hair Loss,hair-products,hair-products-treatments,thinning-hair-loss,130038,1230049,1280031,Hair,Hair Styling & Treatments,Hair Thinning & Hair Loss,hair-products,hair-products-treatments,thinning-hair-loss,130038,1230049,1280031,Hair,Hair Styling & Treatments,Hair Thinning & Hair Loss,hair-products,hair-products-treatments,thinning-hair-loss
3,4,/product/aavrani-hair-scalp-recovery-oil-P510713,P510713,4141,4.7087,103,,2760361,AAVRANI,2760361 1.7 oz,<p>-Amla: Is a superfood and adaptogen that strengthens hair and nurtures the scalp.</p><p>-Bond...,0,0,0,0,1,0,0,$48.00,10,1.7 oz,Standard,https://www.sephora.com:443/v1/catalog/skus/2760361,Size,1.7 oz,1,,,2025-01-03 05:38:46,130038,1230049,1120033,Hair,Hair Styling & Treatments,Hair Oil,hair-products,hair-products-treatments,hair-oil-treatment,130038,1230049,1120033,Hair,Hair Styling & Treatments,Hair Oil,hair-products,hair-products-treatments,hair-oil-treatment,130038,1230049,1120033,Hair,Hair Styling & Treatments,Hair Oil,hair-products,hair-products-treatments,hair-oil-treatment
4,5,/product/aavrani-intensive-repair-conditioning-hair-mask-P510718,P510718,4346,4.5659,129,,2761724,AAVRANI,2761724 8.4 oz,-Mowrah Butter: Intensely hydrates and addresses breakage.<br>-Rice Protein: Is an amino-acid-ri...,0,0,0,0,1,0,0,$53.00,10,8.4 oz,Standard,https://www.sephora.com:443/v1/catalog/skus/2761724,Size,8.4 oz,1,,,2025-01-03 05:38:50,130038,1230049,1660033,Hair,Hair Styling & Treatments,Hair Masks,hair-products,hair-products-treatments,hair-masks,130038,1230049,1660033,Hair,Hair Styling & Treatments,Hair Masks,hair-products,hair-products-treatments,hair-masks,130038,1230049,1660033,Hair,Hair Styling & Treatments,Hair Masks,hair-products,hair-products-treatments,hair-masks
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22677,22678,/product/7-virtues-amber-vanilla-eau-de-parfum-travel-spray-P509944,P509944,15579,4.6489,470,,2762730,The 7 Virtues,2762730 0.34 oz eau de parfum spray,"<p>Alcohol Denat., Fragrance/Parfum, Water/Eau/Aqua, Farnesol, Hydroxycitronellal, Limonene, Lin...",0,0,0,0,0,0,0,$43.50,10,0.34 oz,Standard,https://www.sephora.com:443/v1/catalog/skus/2762730,Size + Concentration + Formulation,0.34 oz eau de parfum spray,1,,Mini,2025-01-03 15:01:43,160006,1230039,1110034,Fragrance,Women,Rollerballs & Travel Size,fragrance,fragrances-for-women,rollerball-perfume-roll-on-perfume,160006,1230039,1110034,Fragrance,Women,Rollerballs & Travel Size,fragrance,fragrances-for-women,rollerball-perfume-roll-on-perfume,160006,1230039,1110034,Fragrance,Women,Rollerballs & Travel Size,fragrance,fragrances-for-women,rollerball-perfume-roll-on-perfume
22678,22679,/product/7-virtues-candied-lychee-eau-de-parfum-P512478,P512478,12598,4.8725,345,,2791606,The 7 Virtues,2791606 1.7 oz / 50ml eau de parfum spray,"Alcohol Denat, Fragrance/Parfum, Water/Eau/Aqua, Limonene, Hexyl Cinnamal, Citronellol, Alpha-Is...",0,0,0,0,0,0,0,$127.00,10,1.7 oz / 50ml,Standard,https://www.sephora.com:443/v1/catalog/skus/2791606,Size + Concentration + Formulation,1.7 oz / 50ml eau de parfum spray,1,,,2025-01-03 15:01:48,160006,1230039,60148,Fragrance,Women,Perfume,fragrance,fragrances-for-women,perfume,160006,1230039,60148,Fragrance,Women,Perfume,fragrance,fragrances-for-women,perfume,160006,1230039,60148,Fragrance,Women,Perfume,fragrance,fragrances-for-women,perfume
22679,22680,/product/7-virtues-candied-lychee-eau-de-parfum-P512478,P512478,12598,4.8725,345,,2791614,The 7 Virtues,2791614 0.34 oz / 10ml eau de parfum spray,"Alcohol Denat, Fragrance/Parfum, Water/Eau/Aqua, Limonene, Hexyl Cinnamal, Citronellol, Alpha-Is...",0,0,0,0,0,0,0,$43.50,10,0.34 oz / 10ml,Standard,https://www.sephora.com:443/v1/catalog/skus/2791614?parentProduct=P512478,Size + Concentration + Formulation,0.34 oz / 10ml eau de parfum spray,1,,Mini,2025-01-03 15:01:48,160006,1230039,60148,Fragrance,Women,Perfume,fragrance,fragrances-for-women,perfume,160006,1230039,60148,Fragrance,Women,Perfume,fragrance,fragrances-for-women,perfume,160006,1230039,60148,Fragrance,Women,Perfume,fragrance,fragrances-for-women,perfume
22680,22681,/product/7-virtues-peace-perfume-sampler-discovery-set-P510389,P510389,18514,4.8122,197,,2762474,The 7 Virtues,2762474,"<b>Vanilla Woods: <br></b>Alcohol Denat., Fragrance/Parfum, Water/Eau/Aqua.<br><b><br>Amber Vani...",1,0,0,0,0,0,0,$34.00,10,,Standard,https://www.sephora.com:443/v1/catalog/skus/2762474,,,1,,,2025-01-03 15:01:52,160006,1230041,60146,Fragrance,Value & Gift Sets,Perfume Gift Sets,fragrance,fragrance-value-sets-gifts,perfume-gift-sets,160006,1230041,60146,Fragrance,Value & Gift Sets,Perfume Gift Sets,fragrance,fragrance-value-sets-gifts,perfume-gift-sets,160006,1230041,60146,Fragrance,Value & Gift Sets,Perfume Gift Sets,fragrance,fragrance-value-sets-gifts,perfume-gift-sets
