# BYOMA

In [None]:
import requests
import pandas as pd
from urllib.parse import urljoin

# -------------------
# 1) BYOMA Shoppers (PowerReviews)
# -------------------

shoppers_url = (
    "https://display.powerreviews.com/m/867678/l/all/product/5060489793461/reviews"
    "?apikey=f40b12ae-7efa-423a-8f12-cb902debf1c0&_noconfig=true&page_locale=en_CA"
)

shoppers_json = requests.get(shoppers_url, timeout=60).json()
results = shoppers_json.get("results", [])
block = results[0] if isinstance(results, list) and len(results) > 0 else {}
shoppers_reviews = block.get("reviews", [])

df_shoppers_raw = pd.json_normalize(shoppers_reviews, sep=".")

# Handle timestamps (some PowerReviews timestamps are ms, some sec)
if not df_shoppers_raw.empty:
    num = pd.to_numeric(df_shoppers_raw.get("details.created_date"), errors="coerce")
    ms = pd.to_datetime(num.where(num >= 1e12), unit="ms", errors="coerce", utc=True)
    s  = pd.to_datetime(num.where(num < 1e12), unit="s", errors="coerce", utc=True)
    shoppers_time = ms.combine_first(s)
else:
    shoppers_time = pd.Series([], dtype="datetime64[ns]")

# Ratings
if "metrics.rating" in df_shoppers_raw:
    rating_series = df_shoppers_raw["metrics.rating"]
elif "details.rating" in df_shoppers_raw:
    rating_series = df_shoppers_raw["details.rating"]
else:
    rating_series = pd.Series([None] * len(df_shoppers_raw))

byoma_shoppers = pd.DataFrame({
    "source": "powerreviews",
    "retailer": "shoppers_ca",
    "product_id": block.get("page_id"),
    "product_name": "BYOMA Moisturizing Rich Cream",
    "rating": pd.to_numeric(rating_series, errors="coerce"),
    "submission_time": shoppers_time,
    "user_nickname": df_shoppers_raw.get("details.nickname"),
    "review_text": df_shoppers_raw.get("details.comments"),
    "price": "21 CAD",
})

print("BYOMA shoppers reviews:", len(byoma_shoppers))


BYOMA shoppers reviews: 10


In [None]:
# -------------------
# 2) BYOMA Ulta (PowerReviews multipage)
# -------------------

base = "https://display.powerreviews.com"
api_key = "daa0f241-c242-4483-afb7-4449942d1a2b"

url = (
    "https://display.powerreviews.com/m/6406/l/en_US/product/pimprod2034791/reviews"
    f"?apikey={api_key}&_noconfig=true"
)

ulta_rows = []
headers = {"User-Agent": "Mozilla/5.0"}
max_pages = 20

page_count = 0
while True:
    js = requests.get(url, timeout=60, headers=headers).json()
    results = js.get("results")
    block = results if isinstance(results, dict) else (results[0] if results else {})
    reviews = block.get("reviews") or []

    if not reviews:
        break

    df_ulta_raw = pd.json_normalize(reviews, sep=".")
    n = len(df_ulta_raw)

    # convert timestamps
    ul_num = pd.to_numeric(df_ulta_raw.get("details.created_date"), errors="coerce")
    ul_ms = pd.to_datetime(ul_num.where(ul_num >= 1e12), unit="ms", errors="coerce", utc=True)
    ul_s  = pd.to_datetime(ul_num.where(ul_num < 1e12), unit="s", errors="coerce", utc=True)
    ul_time = ul_ms.combine_first(ul_s)

    # nickname
    ul_disp = df_ulta_raw.get("details.nickname", pd.Series([None]*n))
    if "reviewer.display_name" in df_ulta_raw:
        ul_disp = df_ulta_raw["reviewer.display_name"].combine_first(ul_disp)
    if "reviewer.name" in df_ulta_raw:
        ul_disp = ul_disp.combine_first(df_ulta_raw["reviewer.name"])

    # review text
    ul_text = df_ulta_raw.get("details.comments", pd.Series([None]*n)).combine_first(
        df_ulta_raw.get("review_text", pd.Series([None]*n))
    )

    # rating
    if "metrics.rating" in df_ulta_raw:
        ul_rating = df_ulta_raw["metrics.rating"]
    elif "details.rating" in df_ulta_raw:
        ul_rating = df_ulta_raw["details.rating"]
    else:
        ul_rating = pd.Series([None]*n)

    df_tmp = pd.DataFrame({
        "source": "powerreviews",
        "retailer": "ulta_us",
        "product_id": block.get("page_id"),
        "product_name": "BYOMA Moisturizing Rich Cream",
        "rating": pd.to_numeric(ul_rating, errors="coerce"),
        "submission_time": ul_time,
        "user_nickname": ul_disp,
        "review_text": ul_text,
        "price": "17 USD",
    })

    ulta_rows.append(df_tmp)
    page_count += 1

    if page_count >= max_pages:
        break

    # next page link
    paging = js.get("paging") or block.get("paging") or {}
    next_rel = paging.get("next_page_url")
    if not next_rel:
        break

    url = urljoin(base, next_rel)
    if "apikey=" not in url:
        url += f"&apikey={api_key}"
    if "_noconfig=true" not in url:
        url += "&_noconfig=true"

byoma_ulta = pd.concat(ulta_rows, ignore_index=True)
print("BYOMA Ulta reviews:", len(byoma_ulta))


BYOMA Ulta reviews: 100


In [None]:
# -------------------
# COMBINE BOTH
# -------------------

byoma_all = pd.concat([byoma_shoppers, byoma_ulta], ignore_index=True)

print("BYOMA TOTAL REVIEWS:", len(byoma_all))
byoma_all.head()


BYOMA TOTAL REVIEWS: 110


Unnamed: 0,source,retailer,product_id,product_name,rating,submission_time,user_nickname,review_text,price
0,powerreviews,shoppers_ca,5060489793461,BYOMA Moisturizing Rich Cream,5,2025-10-15 15:36:44.376000+00:00,Taylor,Bought this moisturizer to add to my morning s...,21 CAD
1,powerreviews,shoppers_ca,5060489793461,BYOMA Moisturizing Rich Cream,5,2024-11-14 01:20:55.816000+00:00,Saphias5,I've finally found my holy grail moisturizer!!...,21 CAD
2,powerreviews,shoppers_ca,5060489793461,BYOMA Moisturizing Rich Cream,5,2024-10-05 19:28:58.249000+00:00,Harman,After a long search i finally found a moisturi...,21 CAD
3,powerreviews,shoppers_ca,5060489793461,BYOMA Moisturizing Rich Cream,5,2024-09-22 11:57:49.326000+00:00,Sam,Its so soft on my skin and protect my barrier ...,21 CAD
4,powerreviews,shoppers_ca,5060489793461,BYOMA Moisturizing Rich Cream,5,2024-05-12 05:07:37.348000+00:00,Carl0120,"Great product, great price. Can't beat it. Hyd...",21 CAD


#DRUNK ELEPHANT

In [None]:
import requests
import pandas as pd
from urllib.parse import urljoin

# ===============================================================
# 1) DRUNK ELEPHANT — Sephora (Bazaarvoice)
# ===============================================================

sephora_url = (
    "https://api.bazaarvoice.com/data/reviews.json?"
    "Filter=ProductId%3AP446938&Sort=SubmissionTime%3Adesc&Limit=100&Offset=0"
    "&Include=Products%2CComments&Stats=Reviews"
    "&passkey=calXm2DyQVjcCy9agq85vmTJv5ELuuBCF2sdg4BnJzJus"
    "&apiversion=5.4&Locale=en_CA"
)

sephora_json = requests.get(sephora_url, timeout=60).json()
sephora_reviews = sephora_json.get("Results", [])
products_info = (sephora_json.get("Includes") or {}).get("Products", {})

lala_sephora = pd.DataFrame([{
    "source": "bazaarvoice",
    "retailer": "sephora_ca",
    "product_id": r.get("ProductId"),
    "product_name": "Drunk Elephant Lala Retro Whipped Moisturizer",
    "rating": r.get("Rating"),
    "submission_time": pd.to_datetime(r.get("SubmissionTime"), errors="coerce", utc=True),
    "user_nickname": r.get("UserNickname"),
    "review_text": r.get("ReviewText"),
    "price": "91 CAD",
} for r in sephora_reviews])

print("Drunk Elephant — Sephora reviews:", len(lala_sephora))


# ===============================================================
# 2) DRUNK ELEPHANT — Shoppers Drug Mart (PowerReviews)
# ===============================================================

shoppers_url = (
    "https://display.powerreviews.com/m/867678/l/all/product/812343030350/reviews"
    "?apikey=f40b12ae-7efa-423a-8f12-cb902debf1c0&_noconfig=true&page_locale=en_CA"
)

shoppers_json = requests.get(shoppers_url, timeout=60).json()
results = shoppers_json.get("results", [])
block = results[0] if isinstance(results, list) and len(results) > 0 else {}
shoppers_reviews = block.get("reviews", [])

df_shoppers_raw = pd.json_normalize(shoppers_reviews, sep=".")

# Handle timestamp (ms or sec)
if not df_shoppers_raw.empty:
    num = pd.to_numeric(df_shoppers_raw.get("details.created_date"), errors="coerce")
    ms = pd.to_datetime(num.where(num >= 1e12), unit="ms", errors="coerce", utc=True)
    s  = pd.to_datetime(num.where(num < 1e12), unit="s", errors="coerce", utc=True)
    shoppers_time = ms.combine_first(s)
else:
    shoppers_time = pd.Series([], dtype="datetime64[ns]")

# Rating
if "metrics.rating" in df_shoppers_raw:
    rating_series = df_shoppers_raw["metrics.rating"]
elif "details.rating" in df_shoppers_raw:
    rating_series = df_shoppers_raw["details.rating"]
else:
    rating_series = pd.Series([None] * len(df_shoppers_raw))

lala_shoppers = pd.DataFrame({
    "source": "powerreviews",
    "retailer": "shoppers_ca",
    "product_id": block.get("page_id"),
    "product_name": "Drunk Elephant Lala Retro Whipped Moisturizer",
    "rating": pd.to_numeric(rating_series, errors="coerce"),
    "submission_time": shoppers_time,
    "user_nickname": df_shoppers_raw.get("details.nickname"),
    "review_text": df_shoppers_raw.get("details.comments"),
    "price": "88 CAD",
})

print("Drunk Elephant — Shoppers reviews:", len(lala_shoppers))


# ===============================================================
# 3) DRUNK ELEPHANT — Ulta (PowerReviews MULTI-PAGE)
# ===============================================================

base = "https://display.powerreviews.com"
api_key = "daa0f241-c242-4483-afb7-4449942d1a2b"

url = (
    "https://display.powerreviews.com/m/6406/l/en_US/product/pimprod2028065/reviews"
    f"?apikey={api_key}&_noconfig=true"
)

ulta_rows = []
headers = {"User-Agent": "Mozilla/5.0"}
max_pages = 20

page_count = 0
while True:
    js = requests.get(url, timeout=60, headers=headers).json()
    results = js.get("results")
    block = results if isinstance(results, dict) else (results[0] if results else {})
    reviews = block.get("reviews") or []

    if not reviews:
        break

    df_ulta_raw = pd.json_normalize(reviews, sep=".")
    n = len(df_ulta_raw)

    ul_num = pd.to_numeric(df_ulta_raw.get("details.created_date"), errors="coerce")
    ul_ms = pd.to_datetime(ul_num.where(ul_num >= 1e12), unit="ms", errors="coerce", utc=True)
    ul_s  = pd.to_datetime(ul_num.where(ul_num < 1e12), unit="s", errors="coerce", utc=True)
    ul_time = ul_ms.combine_first(ul_s)

    ul_disp = df_ulta_raw.get("details.nickname", pd.Series([None]*n))
    if "reviewer.display_name" in df_ulta_raw:
        ul_disp = df_ulta_raw["reviewer.display_name"].combine_first(ul_disp)
    if "reviewer.name" in df_ulta_raw:
        ul_disp = ul_disp.combine_first(df_ulta_raw["reviewer.name"])

    ul_text = df_ulta_raw.get("details.comments", pd.Series([None]*n)).combine_first(
        df_ulta_raw.get("review_text", pd.Series([None]*n))
    )

    if "metrics.rating" in df_ulta_raw:
        ul_rating = df_ulta_raw["metrics.rating"]
    elif "details.rating" in df_ulta_raw:
        ul_rating = df_ulta_raw["details.rating"]
    else:
        ul_rating = pd.Series([None]*n)

    df_tmp = pd.DataFrame({
        "source": "powerreviews",
        "retailer": "ulta_us",
        "product_id": block.get("page_id"),
        "product_name": "Drunk Elephant Lala Retro Whipped Moisturizer",
        "rating": pd.to_numeric(ul_rating, errors="coerce"),
        "submission_time": ul_time,
        "user_nickname": ul_disp,
        "review_text": ul_text,
        "price": "62 USD",
    })

    ulta_rows.append(df_tmp)
    page_count += 1

    if page_count >= max_pages:
        break

    paging = js.get("paging") or block.get("paging") or {}
    next_rel = paging.get("next_page_url")
    if not next_rel:
        break

    url = urljoin(base, next_rel)
    if "apikey=" not in url:
        url += f"&apikey={api_key}"
    if "_noconfig=true" not in url:
        url += "&_noconfig=true"

lala_ulta = pd.concat(ulta_rows, ignore_index=True)
print("Drunk Elephant — Ulta reviews:", len(lala_ulta))


# ===============================================================
# COMBINE ALL (DRUNK ELEPHANT LALA RETRO)
# ===============================================================

lala_all = pd.concat([lala_sephora, lala_shoppers, lala_ulta], ignore_index=True)

print("Drunk Elephant TOTAL:", len(lala_all))
lala_all.head()


Drunk Elephant — Sephora reviews: 100
Drunk Elephant — Shoppers reviews: 10
Drunk Elephant — Ulta reviews: 100
Drunk Elephant TOTAL: 210


Unnamed: 0,source,retailer,product_id,product_name,rating,submission_time,user_nickname,review_text,price
0,bazaarvoice,sephora_ca,P505783,Drunk Elephant Lala Retro Whipped Moisturizer,5,2025-11-14 19:57:38+00:00,,"I love how portable this mini is, it’s great f...",91 CAD
1,bazaarvoice,sephora_ca,P446938,Drunk Elephant Lala Retro Whipped Moisturizer,3,2025-11-05 21:24:29+00:00,houri,This may be better for a younger person and us...,91 CAD
2,bazaarvoice,sephora_ca,P446938,Drunk Elephant Lala Retro Whipped Moisturizer,2,2025-11-01 01:34:39+00:00,KMad80,Do NOT buy the refill! The moisturizer is grea...,91 CAD
3,bazaarvoice,sephora_ca,P446938,Drunk Elephant Lala Retro Whipped Moisturizer,1,2025-10-20 01:21:21+00:00,meowprincess123,I used this product consistently until empty a...,91 CAD
4,bazaarvoice,sephora_ca,P446938,Drunk Elephant Lala Retro Whipped Moisturizer,4,2025-10-15 14:04:26+00:00,hyperspace,I've had oily skin my entire life and haven't ...,91 CAD


# CLINIQUE

In [None]:
import requests
import pandas as pd
from urllib.parse import urljoin

# ===============================================================
# 1) CLINIQUE — Sephora (Bazaarvoice)
# ===============================================================

sephora_url = (
    "https://api.bazaarvoice.com/data/reviews.json?"
    "Filter=ProductId%3AP381030&Sort=SubmissionTime%3Adesc&Limit=100&Offset=0"
    "&Include=Products%2CComments&Stats=Reviews"
    "&passkey=calXm2DyQVjcCy9agq85vmTJv5ELuuBCF2sdg4BnJzJus"
    "&apiversion=5.4&Locale=en_CA"
)

sephora_json = requests.get(sephora_url, timeout=60).json()
sephora_reviews = sephora_json.get("Results", [])
products_info = (sephora_json.get("Includes") or {}).get("Products", {})

clinique_sephora = pd.DataFrame([{
    "source": "bazaarvoice",
    "retailer": "sephora_ca",
    "product_id": r.get("ProductId"),
    "product_name": "Clinique Dramatically Different Moisturizing Lotion+",
    "rating": r.get("Rating"),
    "submission_time": pd.to_datetime(r.get("SubmissionTime"), errors="coerce", utc=True),
    "user_nickname": r.get("UserNickname"),
    "review_text": r.get("ReviewText"),
    "price": "42 CAD",
} for r in sephora_reviews])

print("Clinique — Sephora reviews:", len(clinique_sephora))


# ===============================================================
# 2) CLINIQUE — Shoppers Drug Mart (PowerReviews)
# ===============================================================

shoppers_url = (
    "https://display.powerreviews.com/m/867678/l/all/product/020714598907/reviews"
    "?apikey=f40b12ae-7efa-423a-8f12-cb902debf1c0&_noconfig=true&page_locale=en_CA"
)

shoppers_json = requests.get(shoppers_url, timeout=60).json()
results = shoppers_json.get("results", [])
block = results[0] if isinstance(results, list) and len(results) > 0 else {}
shoppers_reviews = block.get("reviews", [])

df_shoppers_raw = pd.json_normalize(shoppers_reviews, sep=".")

# Timestamp handling
if not df_shoppers_raw.empty:
    num = pd.to_numeric(df_shoppers_raw.get("details.created_date"), errors="coerce")
    ms = pd.to_datetime(num.where(num >= 1e12), unit="ms", errors="coerce", utc=True)
    s  = pd.to_datetime(num.where(num < 1e12), unit="s", errors="coerce", utc=True)
    shoppers_time = ms.combine_first(s)
else:
    shoppers_time = pd.Series([], dtype="datetime64[ns]")

# Rating
if "metrics.rating" in df_shoppers_raw:
    rating_series = df_shoppers_raw["metrics.rating"]
elif "details.rating" in df_shoppers_raw:
    rating_series = df_shoppers_raw["details.rating"]
else:
    rating_series = pd.Series([None] * len(df_shoppers_raw))

clinique_shoppers = pd.DataFrame({
    "source": "powerreviews",
    "retailer": "shoppers_ca",
    "product_id": block.get("page_id"),
    "product_name": "Clinique Dramatically Different Moisturizing Lotion+",
    "rating": pd.to_numeric(rating_series, errors="coerce"),
    "submission_time": shoppers_time,
    "user_nickname": df_shoppers_raw.get("details.nickname"),
    "review_text": df_shoppers_raw.get("details.comments"),
    "price": "42 CAD",
})

print("Clinique — Shoppers reviews:", len(clinique_shoppers))


# ===============================================================
# 3) CLINIQUE — Ulta (PowerReviews MULTI-PAGE)
# ===============================================================

base = "https://display.powerreviews.com"
api_key = "daa0f241-c242-4483-afb7-4449942d1a2b"

url = (
    "https://display.powerreviews.com/m/6406/l/en_US/product/xlsImpprod10791743/reviews"
    f"?apikey={api_key}&_noconfig=true"
)

ulta_rows = []
headers = {"User-Agent": "Mozilla/5.0"}
max_pages = 20
page_count = 0

while True:
    js = requests.get(url, timeout=60, headers=headers).json()
    results = js.get("results")
    block = results if isinstance(results, dict) else (results[0] if results else {})
    reviews = block.get("reviews") or []

    if not reviews:
        break

    df_ulta_raw = pd.json_normalize(reviews, sep=".")
    n = len(df_ulta_raw)

    # timestamps
    ul_num = pd.to_numeric(df_ulta_raw.get("details.created_date"), errors="coerce")
    ul_ms = pd.to_datetime(ul_num.where(ul_num >= 1e12), unit="ms", errors="coerce", utc=True)
    ul_s  = pd.to_datetime(ul_num.where(ul_num < 1e12), unit="s", errors="coerce", utc=True)
    ul_time = ul_ms.combine_first(ul_s)

    # nickname
    ul_disp = df_ulta_raw.get("details.nickname", pd.Series([None]*n))
    if "reviewer.display_name" in df_ulta_raw:
        ul_disp = df_ulta_raw["reviewer.display_name"].combine_first(ul_disp)
    if "reviewer.name" in df_ulta_raw:
        ul_disp = ul_disp.combine_first(df_ulta_raw["reviewer.name"])

    # review text
    ul_text = df_ulta_raw.get("details.comments", pd.Series([None]*n)).combine_first(
        df_ulta_raw.get("review_text", pd.Series([None]*n))
    )

    # rating
    if "metrics.rating" in df_ulta_raw:
        ul_rating = df_ulta_raw["metrics.rating"]
    elif "details.rating" in df_ulta_raw:
        ul_rating = df_ulta_raw["details.rating"]
    else:
        ul_rating = pd.Series([None]*n)

    df_tmp = pd.DataFrame({
        "source": "powerreviews",
        "retailer": "ulta_us",
        "product_id": block.get("page_id"),
        "product_name": "Clinique Dramatically Different Moisturizing Lotion+",
        "rating": pd.to_numeric(ul_rating, errors="coerce"),
        "submission_time": ul_time,
        "user_nickname": ul_disp,
        "review_text": ul_text,
        "price": "38 USD",
    })

    ulta_rows.append(df_tmp)
    page_count += 1

    if page_count >= max_pages:
        break

    paging = js.get("paging") or block.get("paging") or {}
    next_rel = paging.get("next_page_url")
    if not next_rel:
        break

    url = urljoin(base, next_rel)
    if "apikey=" not in url:
        url += f"&apikey={api_key}"
    if "_noconfig=true" not in url:
        url += "&_noconfig=true"

clinique_ulta = pd.concat(ulta_rows, ignore_index=True)
print("Clinique — Ulta reviews:", len(clinique_ulta))


# ===============================================================
# COMBINE ALL (CLINIQUE)
# ===============================================================

clinique_all = pd.concat(
    [clinique_sephora, clinique_shoppers, clinique_ulta],
    ignore_index=True
)

print("Clinique TOTAL:", len(clinique_all))
clinique_all.head()


Clinique — Sephora reviews: 100
Clinique — Shoppers reviews: 10
Clinique — Ulta reviews: 100
Clinique TOTAL: 210


Unnamed: 0,source,retailer,product_id,product_name,rating,submission_time,user_nickname,review_text,price
0,bazaarvoice,sephora_ca,P381030,Clinique Dramatically Different Moisturizing L...,5,2025-11-28 03:42:52+00:00,Sharrrnn,Keeps coming back to this OG lotion. The only ...,42 CAD
1,bazaarvoice,sephora_ca,P425857,Clinique Dramatically Different Moisturizing L...,1,2025-11-17 16:39:19+00:00,dedebece,"It’s SO tiny, 10$ isn’t a lot of money but tha...",42 CAD
2,bazaarvoice,sephora_ca,P381030,Clinique Dramatically Different Moisturizing L...,5,2025-11-02 19:04:25+00:00,Keslerkin,I love this product!! I use this a night and w...,42 CAD
3,bazaarvoice,sephora_ca,P425857,Clinique Dramatically Different Moisturizing L...,4,2025-08-15 16:44:16+00:00,juberkins,Classic face cream good for daily use no stron...,42 CAD
4,bazaarvoice,sephora_ca,P381030,Clinique Dramatically Different Moisturizing L...,5,2025-08-13 21:39:32+00:00,NilooH,"I love this product, and the name ‘Dramaticall...",42 CAD


# ESTEE LAUDER

In [None]:
import requests
import pandas as pd
from urllib.parse import urljoin

# ===============================================================
# 1) ESTÉE LAUDER — Sephora (Bazaarvoice)
# ===============================================================

sephora_url = (
    "https://api.bazaarvoice.com/data/reviews.json?"
    "Filter=ProductId%3AP479839&Sort=SubmissionTime%3Adesc&Limit=100&Offset=0"
    "&Include=Products%2CComments&Stats=Reviews"
    "&passkey=calXm2DyQVjcCy9agq85vmTJv5ELuuBCF2sdg4BnJzJus"
    "&apiversion=5.4&Locale=en_CA"
)

sephora_json = requests.get(sephora_url, timeout=60).json()
sephora_reviews = sephora_json.get("Results", [])
products_info = (sephora_json.get("Includes") or {}).get("Products", {})

estee_supreme_sephora = pd.DataFrame([{
    "source": "bazaarvoice",
    "retailer": "sephora_ca",
    "product_id": r.get("ProductId"),
    "product_name": "Estée Lauder Revitalizing Supreme+ Global Anti-Aging Cream",
    "rating": r.get("Rating"),
    "submission_time": pd.to_datetime(r.get("SubmissionTime"), errors="coerce", utc=True),
    "user_nickname": r.get("UserNickname"),
    "review_text": r.get("ReviewText"),
    "price": "120 CAD",
} for r in sephora_reviews])

print("Estée Lauder Supreme+ — Sephora reviews:", len(estee_supreme_sephora))


# ===============================================================
# 2) ESTÉE LAUDER — Shoppers Drug Mart (PowerReviews)
# ===============================================================

shoppers_url = (
    "https://display.powerreviews.com/m/867678/l/all/product/887167602076/reviews"
    "?apikey=f40b12ae-7efa-423a-8f12-cb902debf1c0&_noconfig=true&page_locale=en_CA"
)

shoppers_json = requests.get(shoppers_url, timeout=60).json()
results = shoppers_json.get("results", [])
block = results[0] if isinstance(results, list) and len(results) > 0 else {}
shoppers_reviews = block.get("reviews", [])

df_shoppers_raw = pd.json_normalize(shoppers_reviews, sep=".")

# Timestamp handling
if not df_shoppers_raw.empty:
    num = pd.to_numeric(df_shoppers_raw.get("details.created_date"), errors="coerce")
    ms = pd.to_datetime(num.where(num >= 1e12), unit="ms", errors="coerce", utc=True)
    s  = pd.to_datetime(num.where(num < 1e12), unit="s", errors="coerce", utc=True)
    shoppers_time = ms.combine_first(s)
else:
    shoppers_time = pd.Series([], dtype="datetime64[ns]")

# Rating
if "metrics.rating" in df_shoppers_raw:
    rating_series = df_shoppers_raw["metrics.rating"]
elif "details.rating" in df_shoppers_raw:
    rating_series = df_shoppers_raw["details.rating"]
else:
    rating_series = pd.Series([None] * len(df_shoppers_raw))

estee_supreme_shoppers = pd.DataFrame({
    "source": "powerreviews",
    "retailer": "shoppers_ca",
    "product_id": block.get("page_id"),
    "product_name": "Estée Lauder Revitalizing Supreme+ Global Anti-Aging Cream",
    "rating": pd.to_numeric(rating_series, errors="coerce"),
    "submission_time": shoppers_time,
    "user_nickname": df_shoppers_raw.get("details.nickname"),
    "review_text": df_shoppers_raw.get("details.comments"),
    "price": "140 CAD",
})

print("Estée Lauder Supreme+ — Shoppers reviews:", len(estee_supreme_shoppers))


# ===============================================================
# 3) ESTÉE LAUDER — Ulta (PowerReviews MULTI-PAGE)
# ===============================================================

base = "https://display.powerreviews.com"
api_key = "daa0f241-c242-4483-afb7-4449942d1a2b"

url = (
    "https://display.powerreviews.com/m/6406/l/en_US/product/pimprod2030772/reviews"
    f"?apikey={api_key}&_noconfig=true"
)

ulta_rows = []
headers = {"User-Agent": "Mozilla/5.0"}
max_pages = 20
page_count = 0

while True:
    js = requests.get(url, timeout=60, headers=headers).json()
    results = js.get("results")
    block = results if isinstance(results, dict) else (results[0] if results else {})
    reviews = block.get("reviews") or []

    if not reviews:
        break

    df_ulta_raw = pd.json_normalize(reviews, sep=".")
    n = len(df_ulta_raw)

    # timestamps
    ul_num = pd.to_numeric(df_ulta_raw.get("details.created_date"), errors="coerce")
    ul_ms = pd.to_datetime(ul_num.where(ul_num >= 1e12), unit="ms", errors="coerce", utc=True)
    ul_s  = pd.to_datetime(ul_num.where(ul_num < 1e12), unit="s", errors="coerce", utc=True)
    ul_time = ul_ms.combine_first(ul_s)

    # nickname
    ul_disp = df_ulta_raw.get("details.nickname", pd.Series([None]*n))
    if "reviewer.display_name" in df_ulta_raw:
        ul_disp = df_ulta_raw["reviewer.display_name"].combine_first(ul_disp)
    if "reviewer.name" in df_ulta_raw:
        ul_disp = ul_disp.combine_first(df_ultima_raw["reviewer.name"])

    # review text
    ul_text = df_ulta_raw.get("details.comments", pd.Series([None]*n)).combine_first(
        df_ulta_raw.get("review_text", pd.Series([None]*n))
    )

    # rating
    if "metrics.rating" in df_ulta_raw:
        ul_rating = df_ulta_raw["metrics.rating"]
    elif "details.rating" in df_ulta_raw:
        ul_rating = df_ulta_raw["details.rating"]
    else:
        ul_rating = pd.Series([None]*n)

    df_tmp = pd.DataFrame({
        "source": "powerreviews",
        "retailer": "ulta_us",
        "product_id": block.get("page_id"),
        "product_name": "Estée Lauder Revitalizing Supreme+ Global Anti-Aging Cream",
        "rating": pd.to_numeric(ul_rating, errors="coerce"),
        "submission_time": ul_time,
        "user_nickname": ul_disp,
        "review_text": ul_text,
        "price": "90 USD",
    })

    ulta_rows.append(df_tmp)
    page_count += 1

    if page_count >= max_pages:
        break

    paging = js.get("paging") or block.get("paging") or {}
    next_rel = paging.get("next_page_url")
    if not next_rel:
        break

    url = urljoin(base, next_rel)
    if "apikey=" not in url:
        url += f"&apikey={api_key}"
    if "_noconfig=true" not in url:
        url += "&_noconfig=true"

estee_supreme_ulta = pd.concat(ulta_rows, ignore_index=True)
print("Estée Lauder Supreme+ — Ulta reviews:", len(estee_supreme_ulta))


# ===============================================================
# COMBINE ALL (ESTÉE LAUDER SUPREME+)
# ===============================================================

estee_supreme_all = pd.concat(
    [estee_supreme_sephora, estee_supreme_shoppers, estee_supreme_ulta],
    ignore_index=True
)

print("Estée Lauder Supreme+ TOTAL:", len(estee_supreme_all))
estee_supreme_all.head()


Estée Lauder Supreme+ — Sephora reviews: 100
Estée Lauder Supreme+ — Shoppers reviews: 10
Estée Lauder Supreme+ — Ulta reviews: 100
Estée Lauder Supreme+ TOTAL: 210


Unnamed: 0,source,retailer,product_id,product_name,rating,submission_time,user_nickname,review_text,price
0,bazaarvoice,sephora_ca,P481084,Estée Lauder Revitalizing Supreme+ Global Anti...,3,2025-11-06 10:56:33+00:00,katie728,This doesn’t absorb and makes me look really g...,120 CAD
1,bazaarvoice,sephora_ca,P479839,Estée Lauder Revitalizing Supreme+ Global Anti...,5,2025-09-03 02:08:05+00:00,TerraleeG,The most incredible moisturizer I’ve ever used...,120 CAD
2,bazaarvoice,sephora_ca,P479839,Estée Lauder Revitalizing Supreme+ Global Anti...,5,2025-08-30 01:26:59+00:00,Valerykl,I’ve been using the Estée Lauder Revitalizing ...,120 CAD
3,bazaarvoice,sephora_ca,P479839,Estée Lauder Revitalizing Supreme+ Global Anti...,5,2025-08-26 03:37:44+00:00,Avabby,Amazing!! It’s super hydrating it leaves my sk...,120 CAD
4,bazaarvoice,sephora_ca,P479839,Estée Lauder Revitalizing Supreme+ Global Anti...,1,2025-08-23 19:43:21+00:00,JoeInsider,"As a moisturizer it's fantastic, especially fo...",120 CAD


# LANCOME

In [None]:
import requests
import pandas as pd
from urllib.parse import urljoin

# ===============================================================
# 1) LANCÔME — Sephora (Bazaarvoice)
# ===============================================================

sephora_url = (
    "https://api.bazaarvoice.com/data/reviews.json?"
    "Filter=ProductId%3AP479839&Sort=SubmissionTime%3Adesc&Limit=100&Offset=0"
    "&Include=Products%2CComments&Stats=Reviews"
    "&passkey=calXm2DyQVjcCy9agq85vmTJv5ELuuBCF2sdg4BnJzJus"
    "&apiversion=5.4&Locale=en_CA"
)

sephora_json = requests.get(sephora_url, timeout=60).json()
sephora_reviews = sephora_json.get("Results", [])
products_info = (sephora_json.get("Includes") or {}).get("Products", {})

lancome_hpn_sephora = pd.DataFrame([{
    "source": "bazaarvoice",
    "retailer": "sephora_ca",
    "product_id": r.get("ProductId"),
    "product_name": "Lancôme Rénergie H.P.N. 300-Peptide Cream",
    "rating": r.get("Rating"),
    "submission_time": pd.to_datetime(r.get("SubmissionTime"), errors="coerce", utc=True),
    "user_nickname": r.get("UserNickname"),
    "review_text": r.get("ReviewText"),
    "price": "165 CAD",
} for r in sephora_reviews])

print("Lancôme HPN — Sephora reviews:", len(lancome_hpn_sephora))


# ===============================================================
# 2) LANCÔME — Shoppers Drug Mart (PowerReviews)
# ===============================================================

shoppers_url = (
    "https://display.powerreviews.com/m/867678/l/all/product/3614273924061/reviews"
    "?apikey=f40b12ae-7efa-423a-8f12-cb902debf1c0&_noconfig=true&page_locale=en_CA"
)

shoppers_json = requests.get(shoppers_url, timeout=60).json()
results = shoppers_json.get("results", [])
block = results[0] if isinstance(results, list) and len(results) > 0 else {}
shoppers_reviews = block.get("reviews", [])

df_shoppers_raw = pd.json_normalize(shoppers_reviews, sep=".")

# Timestamp handling
if not df_shoppers_raw.empty:
    num = pd.to_numeric(df_shoppers_raw.get("details.created_date"), errors="coerce")
    ms = pd.to_datetime(num.where(num >= 1e12), unit="ms", errors="coerce", utc=True)
    s  = pd.to_datetime(num.where(num < 1e12), unit="s", errors="coerce", utc=True)
    shoppers_time = ms.combine_first(s)
else:
    shoppers_time = pd.Series([], dtype="datetime64[ns]")

# Rating
if "metrics.rating" in df_shoppers_raw:
    rating_series = df_shoppers_raw["metrics.rating"]
elif "details.rating" in df_shoppers_raw:
    rating_series = df_shoppers_raw["details.rating"]
else:
    rating_series = pd.Series([None] * len(df_shoppers_raw))

lancome_hpn_shoppers = pd.DataFrame({
    "source": "powerreviews",
    "retailer": "shoppers_ca",
    "product_id": block.get("page_id"),
    "product_name": "Lancôme Rénergie H.P.N. 300-Peptide Cream",
    "rating": pd.to_numeric(rating_series, errors="coerce"),
    "submission_time": shoppers_time,
    "user_nickname": df_shoppers_raw.get("details.nickname"),
    "review_text": df_shoppers_raw.get("details.comments"),
    "price": "175 CAD",
})

print("Lancôme HPN — Shoppers reviews:", len(lancome_hpn_shoppers))


# ===============================================================
# 3) LANCÔME — Ulta (PowerReviews MULTI-PAGE)
# ===============================================================

base = "https://display.powerreviews.com"
api_key = "daa0f241-c242-4483-afb7-4449942d1a2b"

url = (
    "https://display.powerreviews.com/m/6406/l/en_US/product/pimprod2040493/reviews"
    f"?apikey={api_key}&_noconfig=true"
)

ulta_rows = []
headers = {"User-Agent": "Mozilla/5.0"}
max_pages = 20
page_count = 0

while True:
    js = requests.get(url, timeout=60, headers=headers).json()
    results = js.get("results")
    block = results if isinstance(results, dict) else (results[0] if results else {})
    reviews = block.get("reviews") or []

    if not reviews:
        break

    df_ulta_raw = pd.json_normalize(reviews, sep=".")
    n = len(df_ulta_raw)

    ul_num = pd.to_numeric(df_ulta_raw.get("details.created_date"), errors="coerce")
    ul_ms = pd.to_datetime(ul_num.where(ul_num >= 1e12), unit="ms", errors="coerce", utc=True)
    ul_s  = pd.to_datetime(ul_num.where(ul_num < 1e12), unit="s", errors="coerce", utc=True)
    ul_time = ul_ms.combine_first(ul_s)

    ul_disp = df_ulta_raw.get("details.nickname", pd.Series([None]*n))
    if "reviewer.display_name" in df_ulta_raw:
        ul_disp = df_ulta_raw["reviewer.display_name"].combine_first(ul_disp)
    if "reviewer.name" in df_ulta_raw:
        ul_disp = ul_disp.combine_first(df_ulta_raw["reviewer.name"])

    ul_text = df_ulta_raw.get("details.comments", pd.Series([None]*n)).combine_first(
        df_ulta_raw.get("review_text", pd.Series([None]*n))
    )

    if "metrics.rating" in df_ulta_raw:
        ul_rating = df_ulta_raw["metrics.rating"]
    elif "details.rating" in df_ulta_raw:
        ul_rating = df_ulta_raw["details.rating"]
    else:
        ul_rating = pd.Series([None]*n)

    df_tmp = pd.DataFrame({
        "source": "powerreviews",
        "retailer": "ulta_us",
        "product_id": block.get("page_id"),
        "product_name": "Lancôme Rénergie H.P.N. 300-Peptide Cream",
        "rating": pd.to_numeric(ul_rating, errors="coerce"),
        "submission_time": ul_time,
        "user_nickname": ul_disp,
        "review_text": ul_text,
        "price": "120 USD",
    })

    ulta_rows.append(df_tmp)
    page_count += 1

    if page_count >= max_pages:
        break

    paging = js.get("paging") or block.get("paging") or {}
    next_rel = paging.get("next_page_url")
    if not next_rel:
        break

    url = urljoin(base, next_rel)
    if "apikey=" not in url:
        url += f"&apikey={api_key}"
    if "_noconfig=true" not in url:
        url += "&_noconfig=true"

lancome_hpn_ulta = pd.concat(ulta_rows, ignore_index=True)
print("Lancôme HPN — Ulta reviews:", len(lancome_hpn_ulta))


# ===============================================================
# COMBINE ALL (LANCÔME HPN)
# ===============================================================

lancome_hpn_all = pd.concat(
    [lancome_hpn_sephora, lancome_hpn_shoppers, lancome_hpn_ulta],
    ignore_index=True
)

print("Lancôme HPN TOTAL:", len(lancome_hpn_all))
lancome_hpn_all.head()


Lancôme HPN — Sephora reviews: 100
Lancôme HPN — Shoppers reviews: 10
Lancôme HPN — Ulta reviews: 100
Lancôme HPN TOTAL: 210


Unnamed: 0,source,retailer,product_id,product_name,rating,submission_time,user_nickname,review_text,price
0,bazaarvoice,sephora_ca,P481084,Lancôme Rénergie H.P.N. 300-Peptide Cream,3,2025-11-06 10:56:33+00:00,katie728,This doesn’t absorb and makes me look really g...,165 CAD
1,bazaarvoice,sephora_ca,P479839,Lancôme Rénergie H.P.N. 300-Peptide Cream,5,2025-09-03 02:08:05+00:00,TerraleeG,The most incredible moisturizer I’ve ever used...,165 CAD
2,bazaarvoice,sephora_ca,P479839,Lancôme Rénergie H.P.N. 300-Peptide Cream,5,2025-08-30 01:26:59+00:00,Valerykl,I’ve been using the Estée Lauder Revitalizing ...,165 CAD
3,bazaarvoice,sephora_ca,P479839,Lancôme Rénergie H.P.N. 300-Peptide Cream,5,2025-08-26 03:37:44+00:00,Avabby,Amazing!! It’s super hydrating it leaves my sk...,165 CAD
4,bazaarvoice,sephora_ca,P479839,Lancôme Rénergie H.P.N. 300-Peptide Cream,1,2025-08-23 19:43:21+00:00,JoeInsider,"As a moisturizer it's fantastic, especially fo...",165 CAD


# KIEHLS

In [None]:
import requests
import pandas as pd
from urllib.parse import urljoin

# ===============================================================
# 1) KIEHL’S — Sephora (Bazaarvoice)
# ===============================================================

sephora_url = (
    "https://api.bazaarvoice.com/data/reviews.json?"
    "Filter=ProductId%3AP479839&Sort=SubmissionTime%3Adesc&Limit=100&Offset=0"
    "&Include=Products%2CComments&Stats=Reviews"
    "&passkey=calXm2DyQVjcCy9agq85vmTJv5ELuuBCF2sdg4BnJzJus"
    "&apiversion=5.4&Locale=en_CA"
)

sephora_json = requests.get(sephora_url, timeout=60).json()
sephora_reviews = sephora_json.get("Results", [])
products_info = (sephora_json.get("Includes") or {}).get("Products", {})

kiehls_sephora = pd.DataFrame([{
    "source": "bazaarvoice",
    "retailer": "sephora_ca",
    "product_id": r.get("ProductId"),
    "product_name": "Kiehl’s Ultra Facial Moisturizing Cream with Squalane",
    "rating": r.get("Rating"),
    "submission_time": pd.to_datetime(r.get("SubmissionTime"), errors="coerce", utc=True),
    "user_nickname": r.get("UserNickname"),
    "review_text": r.get("ReviewText"),
    "price": "57 CAD",
} for r in sephora_reviews])

print("Kiehl’s — Sephora reviews:", len(kiehls_sephora))


# ===============================================================
# 2) KIEHL’S — Shoppers Drug Mart (PowerReviews)
# ===============================================================

shoppers_url = (
    "https://display.powerreviews.com/m/867678/l/all/product/3605970360757/reviews"
    "?apikey=f40b12ae-7efa-423a-8f12-cb902debf1c0&_noconfig=true&page_locale=en_CA"
)

shoppers_json = requests.get(shoppers_url, timeout=60).json()
results = shoppers_json.get("results", [])
block = results[0] if isinstance(results, list) and len(results) > 0 else {}
shoppers_reviews = block.get("reviews", [])

df_shoppers_raw = pd.json_normalize(shoppers_reviews, sep=".")

# Timestamp handling
if not df_shoppers_raw.empty:
    num = pd.to_numeric(df_shoppers_raw.get("details.created_date"), errors="coerce")
    ms = pd.to_datetime(num.where(num >= 1e12), unit="ms", errors="coerce", utc=True)
    s  = pd.to_datetime(num.where(num < 1e12), unit="s", errors="coerce", utc=True)
    shoppers_time = ms.combine_first(s)
else:
    shoppers_time = pd.Series([], dtype="datetime64[ns]")

# Rating
if "metrics.rating" in df_shoppers_raw:
    rating_series = df_shoppers_raw["metrics.rating"]
elif "details.rating" in df_shoppers_raw:
    rating_series = df_shoppers_raw["details.rating"]
else:
    rating_series = pd.Series([None] * len(df_shoppers_raw))

kiehls_shoppers = pd.DataFrame({
    "source": "powerreviews",
    "retailer": "shoppers_ca",
    "product_id": block.get("page_id"),
    "product_name": "Kiehl’s Ultra Facial Moisturizing Cream with Squalane",
    "rating": pd.to_numeric(rating_series, errors="coerce"),
    "submission_time": shoppers_time,
    "user_nickname": df_shoppers_raw.get("details.nickname"),
    "review_text": df_shoppers_raw.get("details.comments"),
    "price": "60 CAD",
})

print("Kiehl’s — Shoppers reviews:", len(kiehls_shoppers))


# ===============================================================
# 3) KIEHL’S — Ulta (PowerReviews MULTI-PAGE)
# ===============================================================

base = "https://display.powerreviews.com"
api_key = "daa0f241-c242-4483-afb7-4449942d1a2b"

url = (
    "https://display.powerreviews.com/m/6406/l/en_US/product/pimprod2002804/reviews"
    f"?apikey={api_key}&_noconfig=true"
)

ulta_rows = []
headers = {"User-Agent": "Mozilla/5.0"}
max_pages = 20
page_count = 0

while True:
    js = requests.get(url, timeout=60, headers=headers).json()
    results = js.get("results")
    block = results if isinstance(results, dict) else (results[0] if results else {})
    reviews = block.get("reviews") or []

    if not reviews:
        break

    df_ulta_raw = pd.json_normalize(reviews, sep=".")
    n = len(df_ulta_raw)

    # timestamps
    ul_num = pd.to_numeric(df_ulta_raw.get("details.created_date"), errors="coerce")
    ul_ms = pd.to_datetime(ul_num.where(ul_num >= 1e12), unit="ms", errors="coerce", utc=True)
    ul_s  = pd.to_datetime(ul_num.where(ul_num < 1e12), unit="s", errors="coerce", utc=True)
    ul_time = ul_ms.combine_first(ul_s)

    # nickname
    ul_disp = df_ulta_raw.get("details.nickname", pd.Series([None]*n))
    if "reviewer.display_name" in df_ulta_raw:
        ul_disp = df_ulta_raw["reviewer.display_name"].combine_first(ul_disp)
    if "reviewer.name" in df_ulta_raw:
        ul_disp = ul_disp.combine_first(df_ulta_raw["reviewer.name"])

    # review text
    ul_text = df_ulta_raw.get("details.comments", pd.Series([None]*n)).combine_first(
        df_ulta_raw.get("review_text", pd.Series([None]*n))
    )

    # rating
    if "metrics.rating" in df_ulta_raw:
        ul_rating = df_ulta_raw["metrics.rating"]
    elif "details.rating" in df_ulta_raw:
        ul_rating = df_ulta_raw["details.rating"]
    else:
        ul_rating = pd.Series([None]*n)

    df_tmp = pd.DataFrame({
        "source": "powerreviews",
        "retailer": "ulta_us",
        "product_id": block.get("page_id"),
        "product_name": "Kiehl’s Ultra Facial Moisturizing Cream with Squalane",
        "rating": pd.to_numeric(ul_rating, errors="coerce"),
        "submission_time": ul_time,
        "user_nickname": ul_disp,
        "review_text": ul_text,
        "price": "38 USD",
    })

    ulta_rows.append(df_tmp)
    page_count += 1

    if page_count >= max_pages:
        break

    paging = js.get("paging") or block.get("paging") or {}
    next_rel = paging.get("next_page_url")
    if not next_rel:
        break

    url = urljoin(base, next_rel)
    if "apikey=" not in url:
        url += f"&apikey={api_key}"
    if "_noconfig=true" not in url:
        url += "&_noconfig=true"

kiehls_ulta = pd.concat(ulta_rows, ignore_index=True)
print("Kiehl’s — Ulta reviews:", len(kiehls_ulta))


# ===============================================================
# COMBINE ALL (KIEHL’S ULTRA FACIAL)
# ===============================================================

kiehls_all = pd.concat(
    [kiehls_sephora, kiehls_shoppers, kiehls_ulta],
    ignore_index=True
)

print("Kiehl’s TOTAL:", len(kiehls_all))
kiehls_all.head()


Kiehl’s — Sephora reviews: 100
Kiehl’s — Shoppers reviews: 10
Kiehl’s — Ulta reviews: 100
Kiehl’s TOTAL: 210


Unnamed: 0,source,retailer,product_id,product_name,rating,submission_time,user_nickname,review_text,price
0,bazaarvoice,sephora_ca,P481084,Kiehl’s Ultra Facial Moisturizing Cream with S...,3,2025-11-06 10:56:33+00:00,katie728,This doesn’t absorb and makes me look really g...,57 CAD
1,bazaarvoice,sephora_ca,P479839,Kiehl’s Ultra Facial Moisturizing Cream with S...,5,2025-09-03 02:08:05+00:00,TerraleeG,The most incredible moisturizer I’ve ever used...,57 CAD
2,bazaarvoice,sephora_ca,P479839,Kiehl’s Ultra Facial Moisturizing Cream with S...,5,2025-08-30 01:26:59+00:00,Valerykl,I’ve been using the Estée Lauder Revitalizing ...,57 CAD
3,bazaarvoice,sephora_ca,P479839,Kiehl’s Ultra Facial Moisturizing Cream with S...,5,2025-08-26 03:37:44+00:00,Avabby,Amazing!! It’s super hydrating it leaves my sk...,57 CAD
4,bazaarvoice,sephora_ca,P479839,Kiehl’s Ultra Facial Moisturizing Cream with S...,1,2025-08-23 19:43:21+00:00,JoeInsider,"As a moisturizer it's fantastic, especially fo...",57 CAD


In [None]:
import pandas as pd

all_reviews_moisture = pd.concat([
    byoma_all,
    lala_all,
    clinique_all,
    estee_supreme_all,
    lancome_hpn_all,
    kiehls_all
], ignore_index=True)

print("TOTAL ROWS (moisturizers):", len(all_reviews_moisture))

# Preview
all_reviews_moisture.head()


TOTAL ROWS (moisturizers): 1160


Unnamed: 0,source,retailer,product_id,product_name,rating,submission_time,user_nickname,review_text,price
0,powerreviews,shoppers_ca,5060489793461,BYOMA Moisturizing Rich Cream,5,2025-10-15 15:36:44.376000+00:00,Taylor,Bought this moisturizer to add to my morning s...,21 CAD
1,powerreviews,shoppers_ca,5060489793461,BYOMA Moisturizing Rich Cream,5,2024-11-14 01:20:55.816000+00:00,Saphias5,I've finally found my holy grail moisturizer!!...,21 CAD
2,powerreviews,shoppers_ca,5060489793461,BYOMA Moisturizing Rich Cream,5,2024-10-05 19:28:58.249000+00:00,Harman,After a long search i finally found a moisturi...,21 CAD
3,powerreviews,shoppers_ca,5060489793461,BYOMA Moisturizing Rich Cream,5,2024-09-22 11:57:49.326000+00:00,Sam,Its so soft on my skin and protect my barrier ...,21 CAD
4,powerreviews,shoppers_ca,5060489793461,BYOMA Moisturizing Rich Cream,5,2024-05-12 05:07:37.348000+00:00,Carl0120,"Great product, great price. Can't beat it. Hyd...",21 CAD


In [None]:
import pandas as pd
all_reviews_foundation = pd.read_csv("all_reviews_foundation.csv")
# =============================
# MERGE FOUNDATIONS + MOISTURIZERS
# =============================

all_reviews = pd.concat([
    all_reviews_foundation,   # already created earlier
    all_reviews_moisture
], ignore_index=True)

print("TOTAL ROWS (foundations + moisturizers):", len(all_reviews))

# Preview
all_reviews.head()


TOTAL ROWS (foundations + moisturizers): 2663


Unnamed: 0,source,retailer,product_id,product_name,rating,submission_time,user_nickname,review_text,price
0,bazaarvoice,sephora_ca,2558047,Dior Forever Skin Glow,4,2025-11-27 20:10:02+00:00,JulieBBeans,Nice hydrating foundation. A thin layer goes a...,77 CAD
1,bazaarvoice,sephora_ca,2558005,Dior Forever Skin Glow,5,2025-11-04 05:56:27+00:00,,"As someone who does not wear makeup often, thi...",77 CAD
2,bazaarvoice,sephora_ca,2558112,Dior Forever Skin Glow,5,2025-10-31 16:33:35+00:00,PatriceAhl,I love this foundation so much!! I struggle to...,77 CAD
3,bazaarvoice,sephora_ca,2558112,Dior Forever Skin Glow,3,2025-10-30 15:30:06+00:00,manpreet1,Really good Dior hydration foundation but it’s...,77 CAD
4,bazaarvoice,sephora_ca,2558112,Dior Forever Skin Glow,5,2025-10-29 12:52:07+00:00,jenn2080,Beat foundation ever pricey but worth it. Will...,77 CAD


In [None]:
all_reviews_moisture.to_csv('all_reviews_moisture.csv', index=False)
all_reviews.to_csv('all_reviews.csv', index=False)