In [None]:
# Libraries
import pandas as pd
import numpy as np
import re
import networkx as nx
from nltk.tokenize import sent_tokenize
import spacy
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax
import torch
from tqdm import tqdm
from nltk.tokenize import sent_tokenize
from nltk import pos_tag
from nltk.corpus import wordnet
import nltk
from nltk.corpus import stopwords
from collections import Counter
from nltk.stem import WordNetLemmatizer
from collections import defaultdict
from transformers import AutoTokenizer
from symspellpy import SymSpell, Verbosity
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from bertopic import BERTopic
from sentence_transformers import SentenceTransformer, util
from fuzzywuzzy import process
from fuzzywuzzy import fuzz
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import NMF

In [None]:
# Loading the dataset
costco_insta_corpus = pd.read_csv('cleaned_costco_data_initial.csv')

In [None]:
# Dataset
costco_insta_corpus.head(10)

In [None]:
# Lets rename the columns to match other dataset formats
rename_columns = ['url_code', 'post_title', 'post_likes', 'post_year', 'post_month', 'comments', 'comment_like', 'reply_count', 'comment_year', 'comment_month']
costco_insta_corpus.columns = rename_columns

# Adding a tag for the source of data 
costco_insta_corpus[['source']] = 'instagram'
pd.options.display.float_format = '{:.0f}'.format

# Reseting the index
costco_insta_corpus.index = np.arange(1, len(costco_insta_corpus)+1)

In [None]:
costco_insta_corpus

In [None]:
# Information about the corpus
costco_insta_corpus.info()

In [None]:
# Analyzing missing values
costco_insta_corpus.isnull().sum()

In [None]:
# Starting with post_title
# Checking rows with missing 'post_title'
missing_post_title_rows = costco_insta_corpus[costco_insta_corpus['post_title'].isna()]

# Checking unique 'url_code' for rows with missing 'post_title'
unique_url_codes = missing_post_title_rows['url_code'].unique()

# Displaying unique url codes
print(unique_url_codes)

In [None]:
# Next, we will look at post_likesas it is extracted from the same dataset
# Checking rows with missing 'post_likes'
missing_post_title_rows = costco_insta_corpus[costco_insta_corpus['post_likes'].isna()]

# Checking unique 'url_code' for rows with missing 'post_likes'
unique_url_codes = missing_post_title_rows['url_code'].unique()

# Displaying unique url codes
print(unique_url_codes)

In [None]:
# missing data according to 'post_title'
missing_data = pd.read_csv('data.csv')

missing_data

In [None]:
# Update columns from missing_data where url_code matches
for column in missing_data.columns:
    if column in costco_insta_corpus.columns and column != 'url_code':
        costco_insta_corpus[column] = costco_insta_corpus.apply(
            lambda row: missing_data.loc[missing_data['url_code'] == row['url_code'], column].values[0]
            if row['url_code'] in missing_data['url_code'].values else row[column],
            axis=1
        )

In [None]:
# Checking if the values are correctly mapped or not
costco_insta_corpus[costco_insta_corpus['url_code'].isin(unique_url_codes)]

In [None]:
# Now, let's again analyze missing values
costco_insta_corpus.isnull().sum()

In [None]:
# Next, we will look for 'comment_like' column
# Checking rows with missing 'comment_like'
missing_post_title_rows = costco_insta_corpus[costco_insta_corpus['comment_like'].isna()]

# Checking unique 'url_code' for rows with missing 'comment_like'
unique_url_codes = missing_post_title_rows['url_code'].unique()

# Displaying unique url codes
print(unique_url_codes)

In [None]:
costco_insta_corpus[costco_insta_corpus['url_code'].isin(unique_url_codes)]

In [None]:
# Drop the specific row with url_code 'BGVTDDmQrJF'
costco_insta_corpus = costco_insta_corpus[costco_insta_corpus['url_code'] != 'BGVTDDmQrJF']

In [None]:
# Now, let's again analyze missing values
costco_insta_corpus.isnull().sum()

In [None]:
# lastl, we will look at comments
# Checking rows with missing 'comments'
missing_post_title_rows = costco_insta_corpus[costco_insta_corpus['comments'].isna()]

# Checking unique 'url_code' for rows with missing 'comments'
unique_url_codes = missing_post_title_rows['url_code'].unique()

# Displaying unique url codes
print(unique_url_codes)

In [None]:
# Now fill missing comments as the urls have NAN comments (comments in gifs)
costco_insta_corpus['comments'].fillna('[comment in gif format]', inplace=True)

# Check for remaining null values
costco_insta_corpus.isnull().sum()

In [None]:
costco_insta_corpus.info()

In [None]:
# Reseting the index
costco_insta_corpus.index = np.arange(1, len(costco_insta_corpus)+1)

In [None]:
costco_insta_corpus.head(30)

## Phase 1:- Lexical and Morphological Analysis

Lexical Analysis includes the following sub phases: 
   - 1. Lowercasing of the text (in our case the comments and post captions)
   - 2. Removal of special characters
   - 3. Tokenization
   - 4. Stopword removal
   - 5. Lemmatization
   - 6. Spelling Correction

Morphological Analysis includes morphemes that are fundamental building blocks of words, carrying meaning that cannot be further divided.
Two Types :
   - 1. Free Morphemes: These can stand alone as complete words, such as "book" or "happy."
   - 2. Bound Morphemes: These cannot exist independently and must attach to a free morpheme, like prefixes (e.g., "un-" in "unlock") or suffixes (e.g., "-ing" in "running").

Understanding morphemes helps in analyzing word formation and linguistic structures.

In [None]:
# 1. Lowercasing
costco_insta_corpus['post_title'] = costco_insta_corpus['post_title'].str.lower()
costco_insta_corpus['comments'] = costco_insta_corpus['comments'].str.lower()

# 2. Remove usernames except @costco and blank out comments that only had usernames
costco_insta_corpus['comments'] = costco_insta_corpus['comments'].apply(
    lambda x: "" if isinstance(x, str) and re.fullmatch(r'@\w+', x) and x != "@costco"
    else re.sub(r'@(?!costco\b)\w+', '', x) if isinstance(x, str) else x
)

# 3. Remove nulls and empty strings in 'comments'
costco_insta_corpus = costco_insta_corpus[
    costco_insta_corpus['comments'].notnull() & (costco_insta_corpus['comments'].str.strip() != "")
]

# 4. Reindex after cleanup
costco_insta_corpus.reset_index(drop=True, inplace=True)
costco_insta_corpus.index += 1  # Start indexing from 1

In [None]:
print(costco_insta_corpus.iloc[4]['post_title'], "\n")
print(costco_insta_corpus.iloc[4]['comments'])

In [None]:
# 2. Removal of special characters
# Pattern to keep letters, numbers, spaces, and selected emojis/symbols
pattern = r"[^a-zA-Z0-9.,!?$&@#'’%:;()\-\[\]{}\"✔✖❌❤️🔥💯😊👍💪\U0001F000-\U0001FFFF]"

# Remove unwanted characters from 'post_title'
costco_insta_corpus['post_title'] = costco_insta_corpus['post_title'].apply(
    lambda x: re.sub(pattern, ' ', x) if isinstance(x, str) else x
)

# Remove unwanted characters from 'comments'
costco_insta_corpus['comments'] = costco_insta_corpus['comments'].apply(
    lambda x: re.sub(pattern, ' ', x) if isinstance(x, str) else x
)

In [None]:
print(costco_insta_corpus.iloc[4]['post_title'])
print(costco_insta_corpus.iloc[4]['comments'])

In [None]:
# Load a tokenizer specifically trained for social media text (emoji support)
bert_tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=True)

### 🔹 **Function: Merging Subword Tokens**
def merge_subwords(tokens):
    """Properly merge BERT subwords that were split with '@@'."""
    merged_tokens = []
    buffer = ""
    
    for token in tokens:
        if token.endswith("@@"):
            buffer += token.replace("@@", "")  # Remove "@@" and store in buffer
        else:
            if buffer:  # If buffer has accumulated subword parts, merge them
                merged_tokens.append(buffer + token)
                buffer = ""  # Reset buffer
            else:
                merged_tokens.append(token)

    return merged_tokens

### 🔹 **Function: Tokenizing Text with BERT**
def tokenize_text(text):
    """Tokenizes text using BERTweet, merges subwords, and returns tokenized words."""
    if isinstance(text, str):
        tokens = bert_tokenizer.tokenize(text)
        return merge_subwords(tokens)
    return []

### 🔹 **Function: Convert Tokens to BERT Numerical IDs**
def get_bert_numerical_tokens(tokens):
    """Convert a list of tokens into numerical BERT token IDs."""
    if isinstance(tokens, list) and len(tokens) > 0:
        encoded = bert_tokenizer(tokens, is_split_into_words=True, padding=True, truncation=True, return_tensors="pt")
        return encoded['input_ids'].squeeze().tolist()
    return []

In [None]:
# Apply BERT tokenizer to 'post_title' and 'comments'
costco_insta_corpus['post_title_tokens'] = costco_insta_corpus['post_title'].apply(tokenize_text)
costco_insta_corpus['comments_tokens'] = costco_insta_corpus['comments'].apply(tokenize_text)

# Convert tokens into numerical format (IDs)
costco_insta_corpus['post_title_numerical_tokens'] = costco_insta_corpus['post_title_tokens'].apply(get_bert_numerical_tokens)
costco_insta_corpus['comments_numerical_tokens'] = costco_insta_corpus['comments_tokens'].apply(get_bert_numerical_tokens)

In [None]:
print(costco_insta_corpus.iloc[4]['post_title_tokens'], '\n')
print(costco_insta_corpus.iloc[4]['comments_tokens'])

In [None]:
print(costco_insta_corpus.iloc[4]['post_title_numerical_tokens'], '\n')
print(costco_insta_corpus.iloc[4]['comments_numerical_tokens'])

In [None]:
# Load SpaCy and disable unnecessary components for speed
nlp = spacy.load("en_core_web_lg") 

# Set of all stopwords in lowercase + common contractions
custom_stopwords = nlp.Defaults.stop_words.union({
    "i've", "you've", "we've", "they've",
    "i'm", "you're", "we're", "they're",
    "i'll", "you'll", "he'll", "she'll", "we'll", "they'll",
    "i'd", "you'd", "he'd", "she'd", "we'd", "they'd",
    "isn't", "aren't", "wasn't", "weren't", "hasn't", "haven't",
    "hadn't", "doesn't", "don't", "didn't", "won't", "wouldn't",
    "shan't", "shouldn't", "can't", "cannot", "couldn't", "mustn't",
    "let's", "that's", "who's", "what's", "here's", "there's", "where's",
    "when's", "why's", "how's"
})

# Function to filter stopwords using spaCy’s nlp.pipe() for speed
def remove_stopwords_batch(text_series):
    return [
        [token.text for token in doc if token.text.lower() not in custom_stopwords and not token.is_punct]
        for doc in nlp.pipe(text_series.astype(str), batch_size=1000, n_process=-1)
    ]

# Apply to your corpus
costco_insta_corpus['post_title_tokens'] = remove_stopwords_batch(costco_insta_corpus['post_title'])
costco_insta_corpus['comments_tokens'] = remove_stopwords_batch(costco_insta_corpus['comments'])

In [None]:
print(costco_insta_corpus.iloc[3]['post_title_tokens'], '\n')
print(costco_insta_corpus.iloc[3]['comments_tokens'])

In [None]:
# Load only the tagger and lemmatizer for performance
nlp = spacy.load("en_core_web_lg") 

# Lemmatization via spaCy
def spacy_batch_lemmatize(texts):
    lemmatized = []
    for doc in nlp.pipe(texts, batch_size=1000, n_process=-1):
        lemmatized.append([token.lemma_ for token in doc if token.lemma_ != "-PRON-"])
    return lemmatized

# First join tokens back to strings (if already tokenized)
post_texts = costco_insta_corpus['post_title_tokens'].apply(lambda tokens: " ".join(tokens))
comment_texts = costco_insta_corpus['comments_tokens'].apply(lambda tokens: " ".join(tokens))

# Then lemmatize
costco_insta_corpus['post_title_tokens'] = spacy_batch_lemmatize(post_texts.tolist())
costco_insta_corpus['comments_tokens'] = spacy_batch_lemmatize(comment_texts.tolist())

In [None]:
print(costco_insta_corpus.iloc[3]['post_title_tokens'], '\n')
print(costco_insta_corpus.iloc[3]['comments_tokens'])

In [None]:
#5. Spell-Correcter

# Initialize SymSpell
sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)

# Load dictionary file (Ensure the file is in your working directory)
dictionary_path = "frequency_dictionary_en_82_765.txt"
sym_spell.load_dictionary(dictionary_path, term_index=0, count_index=1)

# Function for spelling correction using SymSpell
def correct_spelling(tokens):
    return [sym_spell.lookup(word, Verbosity.CLOSEST, max_edit_distance=2)[0].term if word.isalpha() and 
            sym_spell.lookup(word, Verbosity.CLOSEST, max_edit_distance=2) else word for word in tokens]

In [None]:
# Apply spelling correction to 'post_title_tokens' and 'comments_tokens'
costco_insta_corpus['post_title_tokens'] = costco_insta_corpus['post_title_tokens'].apply(correct_spelling)
costco_insta_corpus['comments_tokens'] = costco_insta_corpus['comments_tokens'].apply(correct_spelling)

In [None]:
print(costco_insta_corpus.iloc[4]['post_title_tokens'], '\n')
print(costco_insta_corpus.iloc[4]['comments_tokens'])

In [None]:
costco_insta_corpus

## Explortory Data Analysis

### Word Frequency Analysis

In [None]:
# Function to keep only words (remove punctuation, emojis, Unicode symbols, numbers, spaces, and apostrophes)
def filter_words_only(tokens):
    return [word.strip() for word in tokens if re.match(r'^[a-zA-Z]+$', word)]  # Keep only alphabetic words

# Get words-only list for visualization (word cloud & frequency) - without modifying dataset
filtered_post_words = [word for tokens in costco_insta_corpus['post_title_tokens'] for word in filter_words_only(tokens)]
filtered_comment_words = [word for tokens in costco_insta_corpus['comments_tokens'] for word in filter_words_only(tokens)]

# Count word frequencies (only words)
filtered_post_word_freq = Counter(filtered_post_words)
filtered_comment_word_freq = Counter(filtered_comment_words)

In [None]:
# Generate Word Clouds (excluding punctuation)
filtered_post_wordcloud = WordCloud(width=800, height=400, background_color='black', colormap='coolwarm').generate_from_frequencies(filtered_post_word_freq)
filtered_comment_wordcloud = WordCloud(width=800, height=400, background_color='black', colormap='coolwarm').generate_from_frequencies(filtered_comment_word_freq)

# Plot Word Cloud for Post Titles (Without Punctuation)
plt.figure(figsize=(10, 5))
plt.imshow(filtered_post_wordcloud, interpolation='bilinear')
plt.axis("off")
plt.title("Frequently Used Terms in Instagram Post Description")
plt.show()

In [None]:
# Plot Word Cloud for Comments (Without Punctuation)
plt.figure(figsize=(10, 5))
plt.imshow(filtered_comment_wordcloud, interpolation='bilinear')
plt.axis("off")
plt.title("Frequently Used Terms in Instagram Discussions")
plt.show()

In [None]:
# Bar chart for top 20 frequent words in post titles (without punctuation)
top_filtered_post_words = filtered_post_word_freq.most_common(20)
top_filtered_post_words_df = pd.DataFrame(top_filtered_post_words, columns=['Word', 'Frequency'])

plt.figure(figsize=(12, 6))
plt.barh(top_filtered_post_words_df['Word'], top_filtered_post_words_df['Frequency'], color='blue')
plt.xlabel("Frequency")
plt.ylabel("Words")
plt.title("Top 20 Most Frequent Words in Post Titles (Without Punctuation)")
plt.gca().invert_yaxis()
plt.show()

In [None]:
# Bar chart for top 20 frequent words in comments (without punctuation)
top_filtered_comment_words = filtered_comment_word_freq.most_common(20)
top_filtered_comment_words_df = pd.DataFrame(top_filtered_comment_words, columns=['Word', 'Frequency'])

plt.figure(figsize=(12, 6))
plt.barh(top_filtered_comment_words_df['Word'], top_filtered_comment_words_df['Frequency'], color='red')
plt.xlabel("Frequency")
plt.ylabel("Words")
plt.title("Top 20 Most Frequent Words in Instagram Comments")
plt.gca().invert_yaxis()
plt.show()

### Monthly & Yearly Trends in Comments and Posts

In [None]:
##  Post Trends

# Ensure 'post_year' column is in integer format
costco_insta_corpus['post_year'] = costco_insta_corpus['post_year'].astype(int)

# Count unique posts per year
yearly_posts = costco_insta_corpus.groupby('post_year')['url_code'].nunique()

# Display yearly unique post counts
print("Yearly Unique Post Counts:\n", yearly_posts)

# Calculate total number of unique posts
total_posts = yearly_posts.sum()
print("\nTotal Number of Unique Posts in Dataset:", total_posts)

# **Plot Yearly Post Trends**
plt.figure(figsize=(10, 5))
plt.plot(yearly_posts.index, yearly_posts.values, marker='o', color='g', linestyle='-')
plt.xlabel('Year', fontsize=12)
plt.ylabel('Number of Unique Posts', fontsize=12)
plt.title('Yearly Post Trends on Instagram', fontsize=14)
plt.grid(True)
plt.xticks(yearly_posts.index, fontsize=10)  # Ensure all years are labeled

# Annotate each year with its post count (offset upward for visibility)
for year, count in zip(yearly_posts.index, yearly_posts.values):
    plt.text(year, count + (0.05 * max(yearly_posts.values)), f'{count}', 
             fontsize=10, ha='center', va='bottom', color='black')

plt.show()

In [None]:
## Comment Trends

# Ensure 'comment_year' column is in integer format
costco_insta_corpus['comment_year'] = costco_insta_corpus['comment_year'].astype(int)

# Group by year to count total comments
yearly_comments = costco_insta_corpus.groupby('comment_year').size()

# Display yearly comment counts
print("Yearly Comment Counts:")
print(yearly_comments)

In [None]:
# Sort the index to maintain chronological order
yearly_comments = yearly_comments.sort_index()

# Plot yearly comment trends
plt.figure(figsize=(10, 5))
plt.plot(yearly_comments.index, yearly_comments.values, marker='o', color='b', linestyle='-')
plt.xlabel('Year')
plt.ylabel('Number of Comments')
plt.title('Yearly Comment Trends on Instagram')
plt.grid(True)
plt.xticks(yearly_comments.index)  # Ensure all years are labeled

# Annotate each year with its comment count (offset upward for visibility)
for year, count in zip(yearly_comments.index, yearly_comments.values):
    plt.text(year, count + (0.05 * max(yearly_comments.values)),  # Slightly above point
             f'{count}', fontsize=10, ha='center', va='bottom', color='black')
plt.show()

In [None]:
## **Monthly Unique Post Trends**
# Ensure 'post_year' and 'post_month' columns are in integer format
costco_insta_corpus['post_year'] = costco_insta_corpus['post_year'].astype(int)
costco_insta_corpus['post_month'] = costco_insta_corpus['post_month'].astype(int)

# Count unique posts per month
monthly_posts = costco_insta_corpus.groupby(['post_year', 'post_month'])['url_code'].nunique()

# Convert to DataFrame & fill NaN with 0
monthly_posts_df = monthly_posts.reset_index().pivot(index='post_month', columns='post_year', values='url_code').fillna(0)

# Display monthly post counts
print("Monthly Unique Post Counts:\n", monthly_posts_df)

# **Plot Monthly Post Trends**
plt.figure(figsize=(12, 6))
monthly_posts_df.plot(kind='bar', figsize=(12, 6), colormap='summer')
plt.xlabel('Month', fontsize=12)
plt.ylabel('Number of Unique Posts', fontsize=12)
plt.title('Monthly Post Trends', fontsize=14, fontweight='bold')
plt.xticks(range(12), ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], rotation=45)
plt.legend(title='Year', fontsize=9)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

In [None]:
## **Monthly Comment Trends**
# Ensure 'comment_year' and 'comment_month' columns are in integer format
costco_insta_corpus['comment_year'] = costco_insta_corpus['comment_year'].astype(int)
costco_insta_corpus['comment_month'] = costco_insta_corpus['comment_month'].astype(int)

# Count total comments per month
monthly_comments = costco_insta_corpus.groupby(['comment_year', 'comment_month']).size()

# Convert to DataFrame & fill NaN with 0
monthly_comments_df = monthly_comments.reset_index().pivot(index='comment_month', columns='comment_year', values=0).fillna(0)

# Display monthly comment counts
monthly_comments_df

In [None]:
# Plot with improved visualization
plt.figure(figsize=(12, 6))
ax = monthly_comments_df.plot(kind='bar', figsize=(12, 6), colormap='viridis', width=0.8, edgecolor = 'black')

plt.xlabel('Month', fontsize=14)
plt.ylabel('Number of Comments', fontsize=14)
plt.title('Monthly Comment Trends on Instagram', fontsize=16)

# Ensure month labels are correctly spaced and readable
plt.xticks(range(12), ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], 
           rotation=45, fontsize=12)

# Improve legend visibility: Horizontal and placed at the bottom
plt.legend(title='Year', fontsize=11, title_fontsize=12, loc='upper center',
           bbox_to_anchor=(0.5, -0.15), frameon=True, ncol=6)

# Adjust grid for better readability
plt.grid(axis='y', linestyle='--', alpha=0.5)

plt.show()

In [None]:
# Count the number of comments per post
comment_distribution = costco_insta_corpus.groupby('url_code')['comments'].count()

# Plot the histogram
plt.figure(figsize=(10, 6))
plt.hist(comment_distribution, bins=30, edgecolor='black', alpha=0.7, color='blue')

plt.xlabel("Number of Comments per Post", fontsize=12)
plt.ylabel("Number of Posts", fontsize=12)
plt.title("Distribution of Comments per Post", fontsize=14)
plt.grid(axis='y', linestyle='--', alpha=0.7)

# Annotate mean and median values
mean_comments = comment_distribution.mean()
median_comments = comment_distribution.median()

plt.axvline(mean_comments, color='red', linestyle='dashed', linewidth=2, label=f'Mean: {int(mean_comments)}')
plt.axvline(median_comments, color='yellow', linestyle='dashed', linewidth=2, label=f'Median: {int(median_comments)}')

plt.legend()
plt.show()

In [None]:
# Find the post with the highest number of comments
most_commented_post = costco_insta_corpus.loc[costco_insta_corpus['comments'].astype(str).str.len().idxmax()]

# Extract the URL code (assuming Instagram URLs follow a standard format)
base_url = "https://www.instagram.com/p/"
most_commented_url = base_url + most_commented_post['url_code']

print("URL of the post with the most comments:", most_commented_url)

In [None]:
costco_insta_corpus.head(10)

## Topic Modelling

In [None]:
costco_categories = {
    "Appliances": [
        "Commercial & Restaurant Appliances",
        "Cooking Appliances",
        "Cooktops",
        "Range Hoods",
        "Ranges",
        "Wall Ovens",
        "Dishwashers",
        "Freezers & Ice Makers",
        "Heating, Cooling & Air Quality",
        "Air Conditioners",
        "Air Filters",
        "Air Purifiers",
        "Dehumidifiers",
        "Fans",
        "Humidifiers",
        "Space Heaters",
        "Kitchen Appliance Packages",
        "Electric Kitchen Appliance Packages",
        "Gas Kitchen Appliance Packages",
        "Microwaves",
        "Refrigerators",
        "Bottom Freezer Refrigerators",
        "French Door Refrigerators",
        "Mini Fridges & Compact Refrigerators",
        "Side-By-Side Refrigerators",
        "Top Freezer Refrigerators",
        "Small Kitchen Appliances",
        "Air Fryers",
        "Blenders & Juicers",
        "Coffee Makers & Accessories",
        "Espresso Machines",
        "Mixers & Food Processors",
        "Pressure Cookers & Slow Cookers",
        "Specialty Appliances",
        "Tea Kettles",
        "Toaster Ovens & Toasters",
        "Vacuum Sealers",
        "Water Coolers & Dispensers",
        "Small Space Appliances",
        "Vacuums & Floor Cleaning",
        "Canister Vacuums",
        "Carpet, Steam & Hard Floor Cleaners",
        "Robot Vacuums & Robot Mops",
        "Stick & Handheld Vacuums",
        "Upright Vacuum Cleaners",
        "Wet/Dry & Shop Vacuums",
        "Washers & Dryers",
        "Electric Dryers",
        "Gas Dryers",
        "Laundry Centers",
        "Washer Dryer Combos",
        "Washers & Electric Dryers",
        "Washers & Gas Dryers",
        "Washing Machines",
        "Wine Coolers & Beverage Refrigerators"
    ],
    "Baby": [
        "Baby Care & Safety",
        "Baby Lotions & Creams",
        "Baby Monitors",
        "Bath & Skin Care",
        "Baby Diapers, Wipes & Training Pants",
        "Baby Formula & Feeding",
        "Toddler Food",
        "Car & Booster Seats",
        "Nursery & Baby Furniture"
    ],
    "Beauty": [
        "Bath & Body",
        "Beauty Gift Sets",
        "Beauty Supplements",
        "Beauty Tools & Accessories",
        "Fragrances",
        "Hair Care",
        "Hair Growth Products",
        "Hair Treatments & Styling Products",
        "Shampoo & Conditioner",
        "Makeup",
        "Nail Care",
        "Skin Care",
        "Cleansers",
        "Moisturizers",
        "Serums & Skin Treatments",
        "Sunscreen & Sun Care"
    ],
    "Clothing, Luggage & Handbags": [
        "Clothing for Babies",
        "Clothing for Kids",
        "Activewear for Kids",
        "Dresses for Kids",
        "Outerwear for Kids",
        "Sets for Kids",
        "Sleepwear for Kids",
        "Socks, Underwear, Accessories for Kids",
        "Swim for Kids",
        "Tops for Kids",
        "Clothing for Men",
        "Accessories for Men",
        "Activewear for Men",
        "Outerwear & Jackets for Men",
        "Pajamas & Loungewear for Men",
        "Pants & Jeans for Men",
        "Shirts for Men",
        "Shoes & Boots for Men",
        "Shorts for Men",
        "Socks & Underwear for Men",
        "Suits for Men",
        "Sweaters & Sweatshirts for Men",
        "Swimwear for Men",
        "Clothing for Women",
        "Accessories for Women",
        "Activewear for Women",
        "Coats & Jackets for Women",
        "Dresses & Skirts for Women",
        "Pajamas & Loungewear for Women",
        "Pants & Jeans for Women",
        "Shirts & Tops for Women",
        "Shoes & Boots for Women",
        "Shorts for Women",
        "Socks & Underwear for Women",
        "Sweaters & Cardigans for Women",
        "Swimwear for Women",
        "Clothing for Youth",
        "Activewear for Youth",
        "Bottoms for Youth",
        "Dresses for Youth",
        "Outerwear for Youth",
        "Sets for Youth",
        "Sleepwear for Youth",
        "Swim for Youth",
        "Tops for Youth",
        "Dresswear",
        "Fan Gear",
        "Handbags & Wallets",
        "Handbags",
        "Holiday Clothing",
        "Luggage",
        "Backpacks & Bags",
        "Carry-Ons",
        "Duffel Bags",
        "Luggage Sets",
        "Umbrellas",
        "Shoes",
        "Shoes & Boots for Men",
        "Shoes & Boots for Women",
        "Workwear"
    ],
    "Computers": [
        "Computer Accessories",
        "Computer Keyboards & Mouse",
        "Surge Protectors & Power Strips",
        "Computer Software",
        "Antivirus & Security Software",
        "Microsoft Office Suite",
        "Small Business & Accounting Software",
        "Tax Preparation Software",
        "Desktop Computers & PCs",
        "Computer Towers",
        "Gaming PCs & Computers",
        "Mac",
        "Hard Drives, SSD & Storage",
        "Portable Storage Drives",
        "Solid State Drives",
        "iPad & Tablets",
        "iPad",
        "iPad & Tablet Accessories",
        "Samsung Galaxy Tablets",
        "Laptops",
        "Chromebooks",
        "MacBook Pro & Air",
        "PC Laptops",
        "Windows Copilot+ PCs",
        "Monitors",
        "PC Gaming",
        "Printer Ink & Toner Cartridges",
        "Printers",
        "Inkjet Printers",
        "Laser Printers",
        "Routers, Wifi Extenders & Modems"
    ],
    "Costco Direct": [
        "Appliances",
        "Fitness",
        "Furniture",
        "Home Upgrades",
        "Mattresses",
        "Patio & Garden",
        "Toys",
        "TVs"
    ],
    "Electronics": [
        "Allstate Protection Plans",
        "Audio/Video",
        "Bluetooth Speakers",
        "Headphones & Earbuds",
        "Home Audio",
        "Projectors",
        "Smart Speakers & Voice Assistants",
        "Soundbars",
        "Streaming Devices",
        "Batteries",
        "Cameras & Drones",
        "Action Cameras & Camcorders",
        "Cell Phones & Accessories",
        "Cell Phone Accessories & Chargers",
        "Shop AT&T",
        "Shop T-Mobile",
        "Gaming Gift Cards",
        "Musical Instruments",
        "Drum Sets",
        "Guitars & Amplifiers",
        "Karaoke Machines",
        "Pianos & Keyboards",
        "Phones & Two-way Radios",
        "Cordless Telephones",
        "VoIP Telephones",
        "Security Cameras & Home Security Systems",
        "Doorbell Cameras",
        "Home Security Systems & Motion Sensors",
        "Security Camera Systems",
        "Security Cameras",
        "Security System Accessories",
        "Smart Home & Home Automation",
        "Automatic Gate & Garage Door Openers",
        "Doorbell Cameras",
        "Home Security Systems & Motion Sensors",
        "Routers, Wifi Extenders & Modems",
        "Security Camera Systems",
        "Security Cameras",
        "Smart Lighting",
        "Smart Speakers & Voice Assistants",
        "Smart Thermostats & Home Energy Monitors",
        "Video Games & Consoles",
        "Arcade & Retro Gaming",
        "Gaming Accessories",
        "Nintendo",
        "Playstation",
        "Virtual Reality",
        "Xbox",
        "Wearable Technology",
        "Fitness Trackers",
        "Smart Watches"
    ],
    "TVs": [
        "32 inch & Below TVs",
        "40 inch - 43 inch TVs",
        "48 inch - 50 inch TVs",
        "55 inch TVs",
        "58 inch - 60 inch TVs",
        "65 inch TVs",
        "70 inch TVs",
        "75 inch - 83 inch TVs",
        "85 inch or larger TVs",
        "TV Accessories"
    ],
    "Floral & Gift Baskets": [
        "Flowers",
        "Bouquets",
        "Bulk Flowers",
        "Floral Centerpieces",
        "Next Business Day Delivery",
        "Roses",
        "Vase Arrangements",
        "Wedding & Event Flowers",
        "Gift Baskets",
        "Deli & Cheese Collections",
        "Fruit & Nut Gift Baskets",
        "Gift Baskets & Treats",
        "Holiday Gift Baskets"
    ],
    "Furniture": [
        "Bedroom Furniture",
        "Bedroom Collections",
        "Bedroom Sets",
        "Beds & Bed Frames",
        "Daybeds",
        "Dressers & Chest of Drawers",
        "Makeup Vanities & Tables",
        "Murphy Beds & Wall Beds",
        "Nightstands & Bedside Tables",
        "Entryway Furniture",
        "Artificial Plants, Flowers & Trees",
        "Closet Organizers",
        "Coat Racks & Hall Trees",
        "Console & Entryway Tables",
        "Fireplaces and Stoves",
        "Kids' & Baby Furniture",
        "Bunk & Loft Beds",
        "Kids Bedroom Collections",
        "Kids Beds",
        "Kids' Bedroom Sets",
        "Nursery & Baby Furniture",
        "Kitchen & Dining Room Furniture",
        "Bar & Wine Cabinets",
        "Bar Stools & Counter Stools",
        "Kitchen & Dining Chairs",
        "Kitchen & Dining Room Sets",
        "Kitchen & Dining Room Tables",
        "Kitchen Islands & Carts",
        "Living Room Furniture",
        "Accent & Coffee Tables",
        "Accent & Living Room Chairs",
        "Accent Cabinets & Chests",
        "Coat Racks & Hall Trees",
        "Console & Entryway Tables",
        "Futons & Sleeper Sofas",
        "Living Room Collections",
        "Living Room Sets",
        "Loveseats",
        "Ottomans & Benches",
        "Recliners",
        "Sectional Sofas",
        "Sofas & Couches",
        "TV Stands & Entertainment Centers",
        "Nursery & Baby Furniture",
        "Office & Home Office Furniture",
        "Bookcases & Bookshelves",
        "Chair Mats",
        "Desks & Workstations",
        "Filing & Office Storage Cabinets",
        "Folding Tables & Chairs",
        "Office & Desk Chairs",
        "Office Furniture Sets"
    ],
    "Gift Cards & Tickets": [
        "Costco Shop Cards",
        "Entertainment & Lifestyle Gift Cards",
        "Gaming Gift Cards",
        "Health & Wellness Gift Cards",
        "Movie Tickets",
        "Restaurant Gift Cards",
        "Sports Tickets",
        "Theater & Live Events",
        "Theme Parks & Attractions",
        "Travel Gift Cards & Passes"
    ],
    "Grocery & Household Essentials": [
        "2-Day Delivery",
        "Health & Beauty",
        "Household",
        "Bakery & Desserts",
        "Beverages & Water",
        "Juice",
        "Milk & Milk Substitutes",
        "Powdered Drink Mix",
        "Soda, Pop & Soft Drinks",
        "Sports & Energy Drinks",
        "Tea",
        "Water",
        "Breakfast",
        "Cereal, Oatmeal, Granola & Oats",
        "Candy",
        "Chocolates",
        "Gum & Mints",
        "Hard & Gummy Candy",
        "Cleaning Supplies",
        "Cleaning Tools",
        "Dish Soap & Dishwasher Detergent",
        "Floor, Bathroom & All-Purpose Cleaners",
        "Laundry Detergent & Supplies",
        "Trash Bags",
        "Coffee",
        "Coffee Creamers",
        "Ground Coffee",
        "Instant Coffee",
        "K-Cups, Coffee Pods & Capsules",
        "Whole Bean Coffee",
        "Dairy",
        "Deli",
        "Caviar",
        "Packaged & Prepared Food",
        "Prosciutto, Smoked & Cured Meats",
        "Emergency Food Supplies & Kits",
        "Gift Baskets",
        "Deli & Cheese Collections",
        "Fruit & Nut Gift Baskets",
        "Gift Baskets & Treats",
        "Holiday Gift Baskets",
        "Wine, Champagne & Sparkling",
        "Kirkland Signature Grocery",
        "Meat & Seafood",
        "Beef",
        "Lamb",
        "Pork",
        "Poultry",
        "Seafood",
        "Organic",
        "Pantry & Dry Goods",
        "Canned Goods",
        "Flour & Baking Supplies",
        "Honey",
        "Nut Butters, Jelly & Jam",
        "Pasta, Rice & Grains",
        "Sauces, Condiments & Marinades",
        "Soup, Bouillon & Broth",
        "Spices, Seasonings & Dried Herbs",
        "Sugar, Syrup & Sweeteners",
        "Vinegar & Cooking Oil",
        "Paper & Plastic Products",
        "Facial Tissue",
        "Food Bags",
        "Paper & Disposable Bowls",
        "Paper Towels & Napkins",
        "Paper, Plastic & Disposable Plates",
        "Parchment Paper, Plastic Wrap & Aluminum Foil",
        "Plastic & Disposable Utensils",
        "Plastic, Paper & Disposable Cups",
        "Toilet Paper",
        "Same-Day Delivery",
        "Snacks",
        "Chips & Pretzels",
        "Cookies",
        "Crackers",
        "Dried Fruit",
        "Fruit Snacks & Applesauce",
        "Jerky & Dried Meats",
        "Nuts & Seeds",
        "Pastries & Muffins",
        "Popcorn",
        "Protein, Breakfast & Snack Bars",
        "Snack & Trail Mix",
        "Wine, Champagne & Sparkling"
    ],
    "Health & Personal Care": [
        "Costco Optical",
        "Health & Medicines",
        "Allergy & Sinus",
        "Antacids & Heartburn Medicine",
        "Cough, Cold & Flu",
        "Eye Care",
        "Fiber & Laxatives",
        "Pain & Fever",
        "Sleep Aids",
        "Smoking Cessation",
        "Topical Remedies",
        "Hearing Care Products",
        "Assisted Hearing Aids",
        "Costco Hearing Aid Center",
        "Home Health Care",
        "Blood Pressure & Health Monitors",
        "Braces & Supports",
        "Contraceptives & Pregnancy Tests",
        "Electrical Muscle Stimulation",
        "First Aid",
        "Home Health Tests",
        "Hot & Cold Therapy",
        "Light Therapy",
        "Massage Equipment",
        "Massage Accessories",
        "Massage Chairs",
        "Massage Tables",
        "Nutrition",
        "Healthy Snacks & Mixes",
        "Protein",
        "Sports Nutrition Supplements",
        "Personal Care",
        "Cleansing Wipes & Cloths",
        "Deodorant",
        "Feminine Hygiene Products",
        "Incontinence",
        "Men's Grooming",
        "Moisturizers",
        "Oral Care",
        "Shaving & Hair Removal",
        "Soap & Bodywash",
        "Reading Glasses",
        "Spa Gifts & Aromatherapy",
        "Travel Immunizations",
        "Vitamins, Herbals & Dietary Supplements",
        "Adult Multi & Letter Vitamins",
        "All Vitamins & Supplements",
        "Calcium & Minerals",
        "Children's Vitamins",
        "CoQ 10 Enzyme",
        "Dietary Supplements",
        "Energy Drinks",
        "Fish Oil & Omega-3",
        "Glucosamine & Joint Supplements",
        "Herbal Supplements",
        "Immune Support",
        "Probiotics",
        "Weight Loss Supplements",
        "Wheelchairs, Walkers & Medical Alert Devices",
        "Medical Alert Systems",
        "Walkers & Wheelchairs"
    ],
    "Holiday & Seasonal": [
        "Easter",
        "Holiday Cards,Wrapping Paper & Ribbons",
        "Holiday Gift Baskets"
    ],
    "Home & Installation Services": [
        "Bath Remodeling",
        "Cabinet Refacing",
        "Carpet & Flooring Installation",
        "Countertop Installation",
        "Custom Blinds, Shades & Shutters",
        "Garage Door & Opener Installation",
        "Generator Installation",
        "Home Organization Services",
        "HVAC Installation",
        "Water Treatment Services",
        "Windows & Doors"
    ],
    "Home & Kitchen": [
        "Bath",
        "Bath Rugs & Mats",
        "Bath Towels & Sheets",
        "Bathroom Accessories & Scales",
        "Bathroom Hardware",
        "Bathroom Mirrors & Medicine Cabinets",
        "Bathroom Remodel & Renovations",
        "Bathroom Sink Faucets",
        "Bathroom Vanities",
        "Bathtubs & Bathtub Faucets",
        "Showers",
        "Toilets & Bidets",
        "Bedding",
        "Bed Sheets",
        "Blankets & Throws",
        "Comforters",
        "Duvet Covers & Quilts",
        "Mattress Toppers, Pads & Protectors",
        "Pillows",
        "Cookware & Bakeware",
        "Baking Sheets & Bakeware Sets",
        "Cookware Sets",
        "Dutch Ovens & Stock Pots",
        "Grill & Griddle Pans",
        "Skillets, Frying & Saute Pans",
        "Cutlery & Kitchen Knives",
        "Kitchen Knife Sets",
        "Knife Block Sets",
        "Glassware & Drinkware",
        "Drinking Glasses",
        "Tumblers & Coffee Mugs",
        "Water Bottles",
        "Wine & Bar Glasses",
        "Home Decor",
        "Artificial Plants, Flowers & Trees",
        "Candles & Candle Holders",
        "Curtains & Drapes",
        "Digital Picture Frames",
        "Home & Decorative Accents",
        "Lamps",
        "Mirrors",
        "Picture Frames",
        "Slipcovers",
        "Throw & Decorative Pillows",
        "Wall Art",
        "Wall Decor",
        "Kitchen Linens & Towels",
        "Kitchen Storage & Organization",
        "Food Storage Containers",
        "Kitchen Cabinet & Pantry Organizers",
        "Kitchen Islands & Carts",
        "Trash Cans",
        "Kitchen Utensils & Gadgets",
        "Bar & Wine Tools",
        "Cutting Boards",
        "Kitchen Tools",
        "Rugs",
        "Area Rugs",
        "Kids Rugs",
        "Kitchen & Door Mats",
        "Outdoor Rugs",
        "Rug Pads",
        "Shag & Sheepskin Rugs",
        "Sewing & Garment Care",
        "Irons & Clothes Steamers",
        "Small Kitchen Appliances",
        "Air Fryers",
        "Blenders & Juicers",
        "Coffee Makers & Accessories",
        "Espresso Machines",
        "Mixers & Food Processors",
        "Pressure Cookers & Slow Cookers",
        "Specialty Appliances",
        "Tea Kettles",
        "Toaster Ovens & Toasters",
        "Vacuum Sealers",
        "Water Coolers & Dispensers",
        "Tableware",
        "Dinnerware Sets",
        "Flatware Sets",
        "Serving Trays, Bowls & Dishes",
        "Vacuums & Floor Cleaning",
        "Canister Vacuums",
        "Carpet, Steam & Hard Floor Cleaners",
        "Robot Vacuums & Robot Mops",
        "Stick & Handheld Vacuums",
        "Upright Vacuum Cleaners",
        "Wet/Dry & Shop Vacuums"
    ],
    "Home Improvement": [
        "Bathroom Remodel & Renovations",
        "Bathroom Hardware",
        "Bathroom Mirrors & Medicine Cabinets",
        "Bathroom Sink Faucets",
        "Bathroom Vanities",
        "Bathtubs & Bathtub Faucets",
        "Showers",
        "Toilets & Bidets",
        "Building Supplies",
        "Door Locks, Handles & Hardware",
        "Gutter Guards",
        "Interior Doors",
        "Fire Safety",
        "Fire Extinguishers",
        "Smoke & Carbon Monoxide Detectors",
        "Flooring",
        "Flooring Accessories",
        "Gym & Multi-Purpose Flooring",
        "Hybrid Resilient Flooring",
        "Installed Flooring",
        "Laminate Flooring",
        "Vinyl Flooring",
        "Wall & Floor Tile",
        "Garage",
        "Garage Storage & Organization",
        "Generators",
        "Kitchen Remodel & Renovations",
        "Garbage Disposals",
        "Kitchen Cabinets",
        "Kitchen Faucets",
        "Kitchen Sinks",
        "Water Filters & Home Filtration Systems",
        "Lighting & Ceiling Light Fixtures",
        "Ceiling Fans",
        "Chandeliers",
        "Flush & Semi-Flush Mount Lighting",
        "Light Bulbs",
        "Outdoor Lighting",
        "Pendant Lighting",
        "Vanity & Bathroom Lighting",
        "Safes",
        "Gun Safes",
        "Home & Office Safes",
        "Storage & Organization",
        "Closet Organizers",
        "Laundry Baskets & Room Storage",
        "Moving Boxes & Supplies",
        "Storage Bins & Containers",
        "Tools & Hardware",
        "Air Tools & Compressors",
        "Electrical Cords & Power Strips",
        "Flashlights & Headlamps",
        "Ladders",
        "Painting Supplies",
        "Power & Hand Tools",
        "Tool Boxes, Chests & Workbenches",
        "Work Gloves & Safety Gear"
    ],
    "Jewelry, Watches & Sunglasses": [
        "Bracelets",
        "Diamond Bracelets",
        "Gemstone Bracelets",
        "Gold Bracelets",
        "Pearl Bracelets",
        "Earrings",
        "Diamond Earrings",
        "Gemstone Earrings",
        "Gold Earrings",
        "One-of-a-Kind Earrings",
        "Pearl Earrings",
        "Necklaces",
        "Diamond Necklaces",
        "Gemstone Necklaces",
        "Gold Necklaces",
        "One-of-a-Kind Necklaces",
        "Pearl Necklaces",
        "Precious Metals",
        "Rings",
        "Bridal & Wedding Ring Sets",
        "Engagement Rings",
        "Gemstone Rings",
        "One-of-a-Kind Rings",
        "Pearl Rings",
        "Wedding Rings & Bands",
        "Sunglasses",
        "Sunglasses for Men",
        "Sunglasses for Women",
        "Unisex Sunglasses",
        "Watches",
        "Luxury Watches",
        "Smart Watches"
    ],
    "Mattresses": [
        "Adjustable Beds",
        "California King Size Mattresses",
        "Full Size Mattresses",
        "King Size Mattresses",
        "Queen Size Mattresses",
        "Twin Size Mattresses",
        "Twin XL Size Mattresses"
    ],
    "Office Products": [
        "Basic Office Supplies",
        "Scissors, Paper Cutters & Trimmers",
        "Staplers & Staples",
        "Tapes & Adhesives",
        "Filing, Binders & Storage",
        "Storage Bins & Containers",
        "Janitorial & Breakroom Supplies",
        "Breakroom Serving Supplies",
        "Bulk Paper Towels",
        "Bulk Toilet Paper & Facial Tissue",
        "Commercial Cleaning Supplies",
        "Commercial Trash Cans & Bags",
        "Floor Mats",
        "Mailing, Packing & Shipping Supplies",
        "Envelopes & Mailers",
        "Moving Boxes & Supplies",
        "Packing Tape & Supplies",
        "Office Electronics",
        "Document Scanners",
        "Paper Shredders",
        "Printers",
        "Paper",
        "Copy & Printer Paper",
        "Self-Stick Notes & Flags",
        "Writing Supplies",
        "Markers & Highlighters",
        "Pencils & Sharpeners",
        "Pens"
    ],
    "Patio, Lawn & Garden": [
        "Backyard Playground Sets",
        "Outdoor Playhouses",
        "Outdoor Playsets & Swing Sets",
        "Greenhouses",
        "Grills & Outdoor Cooking",
        "BBQs & Grills",
        "Grill Tools, Accessories & Fuel",
        "Outdoor Kitchens & BBQ Islands",
        "Outdoor Ovens & Smokers",
        "Hot Tubs, Spas & Pools",
        "Hot Tubs & Spas",
        "Swimming Pools & Pool Chemicals",
        "Landscaping",
        "Artificial Grass & Turf",
        "Fertilizers, Mulch & Soil",
        "Landscape Fabric",
        "Outdoor Fountains & Accessories",
        "Lawn Care & Gardening Supplies",
        "Compost Bins & Composters",
        "Garden Hoses, Sprinklers & Accessories",
        "Garden Tools",
        "Lawn & Plant Care",
        "Outdoor & Garden Decor",
        "Pest & Animal Control",
        "Planters & Plant Pots",
        "Raised Garden Beds",
        "Outdoor Heating & Cooling",
        "Fire Pits & Fire Pit Tables",
        "Patio Heaters",
        "Outdoor Power Equipment",
        "Chainsaws",
        "Hedge & String Trimmers",
        "Lawn Mowers",
        "Leaf & Snow Blowers",
        "Mulchers & Wood Chippers",
        "Pressure Washers",
        "Solar Panels & Generators",
        "Outdoor Storage Sheds",
        "Patio & Outdoor Furniture",
        "Adirondacks, Patio Chairs, & Garden Benches",
        "Commercial Outdoor Patio Furniture",
        "Deck Boxes & Outdoor Storage Boxes",
        "Outdoor Bar Stools & Small Space Sets",
        "Outdoor Patio Chaise Lounges & Daybeds",
        "Outdoor Patio Conversation Sets",
        "Outdoor Patio Cushions",
        "Outdoor Patio Dining Sets",
        "Outdoor Patio Fire Pit Sets",
        "Outdoor Patio Furniture Covers",
        "Outdoor Patio Umbrellas & Sun Shade Sails",
        "Patio Covers & Shade Structures",
        "Canopies & Pop-Up Tents",
        "Gazebos",
        "Pergolas",
        "Plants, Bulbs & Seeds",
        "Cacti & Succulents",
        "Plant Bulbs & Seeds",
        "Plants & Trees",
        "Window Treatments, Awnings & Sun Shades"
    ],
    "Pet Supplies": [
        "Cat Food",
        "Cat Trees",
        "Dewormer, Flea & Tick Treatment",
        "Dog Beds",
        "Dog Food",
        "Pet Clothing & Accessories",
        "Pet Enclosures",
        "Chicken Coops & Accessories",
        "Pet Grooming & Waste Management",
        "Pet Supplements",
        "Pet Technology",
        "Pet Toys & Treats"
    ],
    "Sports & Fitness": [
        "Beach",
        "Beach & Camping Chairs",
        "Beach Games & Toys",
        "Beach Towels",
        "Beach Umbrellas, Bags & Accessories",
        "Sunscreen & Sun Care",
        "Swim for Kids",
        "Swim for Youth",
        "Swimwear for Men",
        "Swimwear for Women",
        "Water Sports",
        "Bikes, Scooters & Boards",
        "Bicycles",
        "Electric Bikes",
        "Helmets",
        "Scooters",
        "Camping Gear",
        "Air Mattresses & Sleeping Pads",
        "Beach & Camping Chairs",
        "Binoculars & Telescopes",
        "Camp Kitchen & Cooking",
        "Camping Accessories",
        "Camping Cots",
        "Camping Lanterns",
        "Coolers",
        "Hammocks",
        "Sleeping Bags",
        "Tents & Shelters",
        "Fan Gear",
        "Fishing & Hunting",
        "Binoculars & Spotting Scopes",
        "Hunting Gear",
        "Outdoor Electronics & Navigation",
        "Fitness & Exercise Equipment",
        "Dumbbells, Weight Sets & Lifting Accessories",
        "Ellipticals",
        "Exercise & Stationary Bikes",
        "Home Gym Systems",
        "Inversion Tables",
        "Pilates Machines & Reformers",
        "Rowing Machines",
        "Treadmills",
        "Workout Recovery",
        "Game Room",
        "Air Hockey & Foosball Tables",
        "Arcade Games & Dartboards",
        "Pool Tables",
        "Table Tennis",
        "Golf",
        "Golf Accessories",
        "Golf Bags & Carts",
        "Golf Balls",
        "Golf Clubs",
        "Golf Gloves",
        "Golf Simulators and Training Aids",
        "Outdoor Games & Sports Equipment",
        "Outdoor Games",
        "Sports Equipment",
        "Saunas",
        "Sports Memorabilia & Collectibles",
        "Tailgating Essentials",
        "Sports Fan Shop",
        "Tailgating Chairs, Tables & Wagons",
        "Water Sports",
        "Floats & Water Toys",
        "Life Jackets & Vests",
        "Paddle Boards, Surfboards & Kayaks",
        "Swim Goggles & Snorkels",
        "Towables & Boat Tubes"
    ],
    "Tires & Auto": [
        "ATV, Golf & Trailer Tires",
        "ATV & UTV Tires",
        "Golf Tires",
        "Trailer Tires",
        "Auto & Truck Accessories",
        "Cargo Storage & Racks",
        "Emergency Kits & Power Inverters",
        "Seat Covers & Floor Mats",
        "Trailers & Towing",
        "Winter Accessories",
        "Wiper Blades & Auto Accessories",
        "Car Electronics",
        "Garage & Shop Accessories",
        "Lifts & Jacks",
        "Tools & Equipment",
        "Utility Carts",
        "Garage Flooring",
        "Interstate Car & Truck Batteries",
        "Motor Oil",
        "Tires",
        "Wash & Wax",
        "Wheels"
    ],
    "Toys": [
        "Arts, Crafts & Hobby",
        "Building Blocks & Sets",
        "Dolls & Action Figures",
        "Dress Up & Pretend Play",
        "Outdoor & Water Toys",
        "Puzzles, Cards & Board Games",
        "Ride-On Toys",
        "Stuffed Animals & Plush Toys"
    ],
    "Costco Brand Promotion" : [
        "Posts about Brand Promotion"
    ]
}

In [None]:
# Load embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Prepare subcategory embeddings
subcategory_sentences = {}
for main_cat, subcats in costco_categories.items():
    for sub in subcats:
        subcategory_sentences[sub] = sub

subcategory_list = list(subcategory_sentences.keys())
subcategory_embeddings = model.encode(subcategory_list, convert_to_tensor=True)

# Function to map post to category
def map_post_to_category(post_text, threshold=0.35, short_word_limit=5):
    if not isinstance(post_text, str) or post_text.strip() == "":
        return "Posts about Brand Promotion", "Costco Brand Promotion"

    neutral_context = f"This is a product review post: {post_text}"
    post_embedding = model.encode(neutral_context, convert_to_tensor=True)
    similarities = util.cos_sim(post_embedding, subcategory_embeddings)[0]
    
    best_match_idx = similarities.argmax().item()
    best_score = similarities[best_match_idx].item()
    best_subcat = subcategory_list[best_match_idx]

    # Define brand promo logic: short post or low similarity
    if len(post_text.split()) <= short_word_limit or best_score < threshold:
        return "Posts about Brand Promotion", "Costco Brand Promotion"

    main_cat = [main for main, subs in costco_categories.items() if best_subcat in subs][0]
    return best_subcat, main_cat

# Apply to your DataFrame
costco_insta_corpus[['subcategory', 'main_category']] = costco_insta_corpus['post_title'].apply(
    lambda x: pd.Series(map_post_to_category(x))
)

In [None]:
# Quick peek
costco_insta_corpus.head(10)

## Extractive Summerization

In [None]:
EMBED_ID   = "paraphrase-mpnet-base-v2"   
embedder   = SentenceTransformer(EMBED_ID)

def textrank_summary(text: str, top_n: int = 2) -> str:
    # leave very short texts untouched
    if not isinstance(text, str) or len(text.split()) < 10:
        return text

    # 1) sentence split & filter trivial sentences
    sentences = [s.strip() for s in sent_tokenize(text) if len(s.split()) > 2]
    N = len(sentences)
    if N <= top_n:
        return text

    # 2) embed all sentences
    sent_embs = embedder.encode(sentences, convert_to_tensor=True, normalize_embeddings=True)

    # 3) build similarity graph (cosine > 0.3 threshold ≈ w/ small noise)
    sims = util.cos_sim(sent_embs, sent_embs).cpu().numpy()
    np.fill_diagonal(sims, 0)                 # drop self-loops
    G = nx.Graph()
    for i in range(N):
        for j in range(i+1, N):
            if sims[i, j] > 0.30:             # prune weak edges
                G.add_edge(i, j, weight=float(sims[i, j]))

    # 4) PageRank
    ranks = nx.pagerank(G, weight='weight')
    top   = sorted(ranks, key=ranks.get, reverse=True)[:top_n]
    top.sort()                                # keep original order
    return " ".join([sentences[i] for i in top])

# ================================================================
# ❸  Apply to your dataframe
# ================================================================
tqdm.pandas(desc="TextRank summarising")
costco_insta_corpus["extractive_summary"] = (
    costco_insta_corpus["comments"].progress_apply(textrank_summary)
)

# Quick peek
costco_insta_corpus[["comments","extractive_summary"]].head(3)

In [None]:
costco_insta_corpus.head(20)

## Sentiment Analysis Model

In [None]:
# Load tokenizer and model
model_name = "cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Labels mapping from model card
labels = ['negative', 'neutral', 'positive']

def get_sentiment_details(text):
    try:
        encoded_input = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
        output = model(**encoded_input)
        scores = output[0][0].detach().numpy()
        scores = softmax(scores)

        label_index = np.argmax(scores)
        label = labels[label_index]

        # Sentiment percentages
        sentiment_percentages = {
            'negative_pct': scores[0] * 100,
            'neutral_pct': scores[1] * 100,
            'positive_pct': scores[2] * 100
        }
        
        # ⚡ Return only 4 items now
        return pd.Series([label, sentiment_percentages['negative_pct'], sentiment_percentages['neutral_pct'], sentiment_percentages['positive_pct']])

    except Exception as e:
        return pd.Series(["", 0, 0, 0])

In [None]:
# Apply to Instagram comments
tqdm.pandas()
costco_insta_corpus[['sentiment', 'negative_pct', 'neutral_pct', 'positive_pct']] = (
    costco_insta_corpus['comments'].progress_apply(get_sentiment_details)
)

# View sentiment distribution
print(costco_insta_corpus['sentiment'].value_counts())

In [None]:
costco_insta_corpus

In [None]:
#costco_insta_corpus.to_csv('costco_instagram_data.csv', index=False)