In [None]:
import pandas as pd
from groq import Groq
import json
import time
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import precision_score, recall_score, f1_score, jaccard_score
# --- Configuration ---
# Replace with your actual Groq API key
client = Groq(api_key="My Key")
# --- Load Labeled Data (Original Training File for Splitting) ---
full_labeled_df = pd.read_excel(r"C:\Users\shrey\Downloads\bodywash-train.xlsx")
full_labeled_df= full_labeled_df[['Core Item','Level 1 (PARENT)','Level 2 (CHILD)']]
full_labeled_df.loc[7336,'Level 2 (CHILD)']='Exfoliating Ingredient'
full_labeled_df.loc[7380,'Level 2 (CHILD)']='Special Occassions'
def preprocess_text(text):
    text = text.lower()                              # Lowercase
    text = re.sub(r'\n', ' ', text)                  # Remove newlines
    text = re.sub(r'\s+', ' ', text)                 # Collapse multiple spaces
    return text.strip()
full_labeled_df['Core Item'] = full_labeled_df['Core Item'].apply(preprocess_text)
full_labeled_df['Level 1 (PARENT)'] =full_labeled_df['Level 1 (PARENT)'].apply(preprocess_text)
full_labeled_df['Level 2 (CHILD)']=full_labeled_df['Level 2 (CHILD)'].apply(preprocess_text)
# --- Load Unlabeled Test Data (The file for final prediction) ---
final_unlabeled_test_df = pd.read_excel(r"C:\Users\shrey\Downloads\bodywash-test.xlsx")
# --- Split Labeled Data into Training and Validation Sets (80:20) ---
final_unlabeled_test_df['Core Item'] = final_unlabeled_test_df['Core Item'].apply(preprocess_text)
try:
    train_df_raw, validation_df_raw = train_test_split(
        full_labeled_df, # Using the FULL labeled dataframe
        test_size=0.2,
        random_state=42,
        # Stratify by 'Level 1 (PARENT)' if possible and column has reasonable unique values
        # Note: Stratify can fail if a class has only 1 sample.
        stratify=full_labeled_df if len(full_labeled_df.unique()) <= 500 and not full_labeled_df.isnull().all() else None
    )
except ValueError as e:
    print(f"Warning: Stratification failed: {e}. Splitting without stratification.")
    train_df_raw, validation_df_raw = train_test_split(
        full_labeled_df, # Using the FULL labeled dataframe
        test_size=0.2,
        random_state=42
    )
print(f"\nRaw data split complete: ")
print(f"  Raw Training set size: {len(train_df_raw)} rows (80%)")
print(f"  Raw Validation set size: {len(validation_df_raw)} rows (20%)")
# --- Apply Grouping to Training and Validation DataFrames ---
def group_dataframe_labels(df):
    # Standardize labels to lowercase and strip whitespace immediately upon parsing
    df = df.apply(lambda x: [item.strip().lower() for item in str(x).split(',') if item.strip()])
    df = df.apply(lambda x: [item.strip().lower() for item in str(x).split(',') if item.strip()])

    grouped_df = df.groupby('Core Item', sort=False).agg({
        'Level 1 (PARENT)': lambda x: list(set(sum(x.tolist(),))), # Corrected sum() usage
        'Level 2 (CHILD)': lambda x: list(set(sum(x.tolist(),)))  # Corrected sum() usage
    }).reset_index()
    return grouped_df
train_df = group_dataframe_labels(train_df_raw)
validation_df = group_dataframe_labels(validation_df_raw)

print(f"\nData grouped by 'Core Item':")
print(f"  Grouped Training set size: {len(train_df)} unique items")
print(f"  Grouped Validation set size: {len(validation_df)} unique items")
print("\nSample of grouped training data (first 5 unique items):")
print(train_df.head())
# --- Prepare Few-Shot Examples: Using the 50 examples provided ---
FEW_SHOT_PROMPT_PART = """
Example 1:
Core Item: "#1 choice for the man of the house-I buy this for my guy and it smells soooooo good! Getting it through Amazon is more expensive than going to the store but you pay extra for convenience. He loves it as well and doesn't use anything else."
Output:
{
 "Level 1 (PARENT)": ["brand value", "price", "accessibility", "fragrance", "companion approval"],
 "Level 2 (CHILD)": ["brand affinity / loyalty", "discounts / promotions", "online availability", "personal likability (fragrance)", "spouse / partner"]
}

Example 2:
Core Item: "A manly man man scent that's manly-Once, long ago, I left the rural lands teaming with forestry, animals, and endless uninventive sports bars for big city life. Soon after I found myself bereft of the things that smelled of home. While I don't mind diesel soot and overflowing garbage at times, I do long for those old familiar scents. I'd also began to miss the rural activities. I had no lawn to cut, no garage to tinker in, no more camouflage. Fast forward a decade or two and I stumble upon this bottle of manly goodness. Upon the cracking of the ergonomically designed pop/top lid I quickly discovered that I no longer had to smell of Chad or Brock with Axe, Roy or Bill with Dial, Grayson or Skylar with Dove. No, I could now smell pleasantly obnoxious. Manly obnoxious. My glaring nostrils over-inhaled scents of 2 cycle exhaust, WD-40, and the fresh mailing of a pine through a wood chipper. When washing, I feel like angelic duct tape has wrapped my slightly major finger injury in warmth and comfort of lesser gods. After two weeks my chest hair has returned, I've rediscovered the Army surplus store, and currently pricing rural tracts of land contains abandoned vehicles and reeking of deer urine. My briefcase now a rucksack and my Kia has become a Ranchero. Thank Cremo, you've helped me become the man I once was. Godspeed."
Output:
{
 "Level 1 (PARENT)": ["packaging", "self esteem", "fragrance"],
 "Level 2 (CHILD)": ["dispensing mechanism", "masculine", "masculine fragrance", "premium fragrance"]
}

Example 3:
Core Item: "A must purchase-Clean scent for a man. The price is worth the size of the bottles. Leave the skin very moisturized. I am sold."
Output:
{
 "Level 1 (PARENT)": ["fragrance", "skin care", "price"],
 "Level 2 (CHILD)": ["fresh fragrance", "hydrating / moisturizing", "value justification"]
}

Example 4:
Core Item: "A Wonderful Product-AXE products are the only body wash products that I use and trust to give me a deep clean and fresh feeling. The 16 oz Night brand lasts close to a month and keeps a nice fragrance with me for hours after my shower. I buy all of the different AXE brands, but this is my favorite."
Output:
{
 "Level 1 (PARENT)": ["brand value", "cleansing", "feel / finish", "fragrance", "price"],
 "Level 2 (CHILD)": ["brand affinity / loyalty", "deep cleaning", "fresh / clean feeling", "long lasting fragrance", "value justification"]
}

Example 5:
Core Item: "A wonderful, subtle, relaxing minty smell-A wonderful, subtle, relaxing minty smell. Doesn't sting at all, for those who are worried about the mint aspect. It's probably my favorite body wash right now."
Output:
{
 "Level 1 (PARENT)": ["fragrance", "brand value", "product safety"],
 "Level 2 (CHILD)": ["aromatherapy", "brand affinity / loyalty", "fragrance strength", "fragrance type", "side effects"]
}

Example 6:
Core Item: "Acrylates Copolymer aka plastic-Banned in Illinois, one of its ingredients is Acrylates Copolymer. Click on Additional Information. If someone from Illinois bought this I wonder if Amazon or the consumer would be breaking the law."
Output:
{
 "Level 1 (PARENT)": ["brand accountability"],
 "Level 2 (CHILD)": ["eco-friendly"]
}

Example 7:
Core Item: "All around great product!-I recently started using Cremo body wash and it is the best body wash I've ever used. It smells amazing. I started with the Forest Blend then got the Blue Cedar &Cypress... the a Blue Cedar&Cypress smells incredible!! The scent lasts almost all day but isn't overpowering. I highly recommend using this body wash!"
Output:
{
 "Level 1 (PARENT)": ["brand value", "fragrance"],
 "Level 2 (CHILD)": ["best-in-class", "fragrance strength", "long lasting fragrance", "personal experience"]
}

Example 8:
Core Item: "All over greatness-I love this smell and use it all over daily. The smell also helps me wake up. I also use it to remove my mustache wax when it's time to take it off which is great because I don't have to damage my mustache when I'm trying to clean it."
Output:
{
 "Level 1 (PARENT)": ["fragrance", "convenience"],
 "Level 2 (CHILD)": ["fresh fragrance", "multi-purpose"]
}

Example 9:
Core Item: "All the honeys love me before and after this body wash-So good"
Output:
{
 "Level 1 (PARENT)": ["social recognition"],
 "Level 2 (CHILD)": ["sex appeal"]
}

Example 10:
Core Item: "Almost no sandalwood fragrance-I'm a big fan of Sandalwood. This has almost no sandalwood fragrance. I'd rather buy Mysore Sandalwood soaps that are much stronger in fragrance."
Output:
{
 "Level 1 (PARENT)": ["fragrance"],
 "Level 2 (CHILD)": ["fragrance strength", "fragrance type"]
}

Example 11:
Core Item: "alot of waste-I never had body wash that came out like water."
Output:
{
 "Level 1 (PARENT)": ["product texture", "convenience"],
 "Level 2 (CHILD)": ["consistency", "product application"]
}

Example 12:
Core Item: "Alright for the price.-Like the man scent it leaves on me. Just wish it were more concentrated so I can apply less to my wash cloth. Over all I'll continue to use just hope they don't make less concentrated than it is."
Output:
{
 "Level 1 (PARENT)": ["product texture", "fragrance", "price"],
 "Level 2 (CHILD)": ["consistency", "masculine fragrance", "value justification"]
}

Example 13:
Core Item: "Always great to check reviews-Absolutely love this scent old spice with shea butter. I opened it up yesterday got it for my child's father it is such a great scent will buy more once it runs out and it is 4 16oz bottles which it will last a good amount of time."
Output:
{
 "Level 1 (PARENT)": ["peer recommendation", "packaging", "companion approval"],
 "Level 2 (CHILD)": ["online reviews", "sku", "spouse / partner"]
}

Example 14:
Core Item: "Always happy with axe products-Always happy with all axe products. Great value"
Output:
{
 "Level 1 (PARENT)": ["brand value", "price"],
 "Level 2 (CHILD)": ["personal experience", "value justification"]
}

Example 15:
Core Item: "Amanda de Beaulieu-Husband loves this and it smells dreamy clean!! His skin is not as dry since using."
Output:
{
 "Level 1 (PARENT)": ["skin care", "fragrance", "companion approval"],
 "Level 2 (CHILD)": ["dryness control", "fresh fragrance", "spouse / partner"]
}

Example 16:
Core Item: "Amazing and best one out there!-Awesome!"
Output:
{
 "Level 1 (PARENT)": ["brand value"],
 "Level 2 (CHILD)": ["best-in-class"]
}

Example 17:
Core Item: "Amazing Body Scrub-I love this stuff. It is by far the most effective and best body scrub I have used. I was extremely happy to find it here on Amazon, because a local store stopped selling it. This scrub really gets the dead, dry skin off and it smells good. Highly recommend it!"
Output:
{
 "Level 1 (PARENT)": ["brand value", "cleansing", "accessibility", "fragrance"],
 "Level 2 (CHILD)": ["best-in-class", "exfoliation", "offline availability", "online availability", "personal likability (fragrance)"]
}

Example 18:
Core Item: "Amazing body wash-I love this body wash. I don't like alot of the scents because they are usually very harsh, but this citrus is amazing. Glad I picked up a 2 pack"
Output:
{
 "Level 1 (PARENT)": ["fragrance"],
 "Level 2 (CHILD)": ["fragrance strength", "fragrance type"]
}

Example 19:
Core Item: "Amazing Body Wash-The scent is fantastic, love having several to stash under the sink for future use. Hubby and son love the scent (as do I) and the great clean and moisturized feeling."
Output:
{
 "Level 1 (PARENT)": ["feel / finish", "skin care", "companion approval", "fragrance", "cleansing"],
 "Level 2 (CHILD)": ["fresh / clean feeling", "hydrating / moisturizing", "parent", "personal likability (fragrance)", "regular cleansing", "spouse / partner"]
}

Example 20:
Core Item: "Amazing fresh fragrance-I used item for shower gel and it's amazing just a little bit goes a very long way"
Output:
{
 "Level 1 (PARENT)": ["fragrance", "price"],
 "Level 2 (CHILD)": ["fresh fragrance", "value justification"]
}

Example 21:
Core Item: "Amazing light smell-I have a sensitive sense of smell, and do not enjoy overly fragrant anything. My husband uses this and smells delicious; still a fresh & clean manly scent, and not overpowering at all."
Output:
{
 "Level 1 (PARENT)": ["fragrance", "companion approval"],
 "Level 2 (CHILD)": ["fragrance strength", "fresh fragrance", "masculine fragrance", "spouse / partner"]
}

Example 22:
Core Item: "Amazing stuff-Okay I have to say I am very very impressed with this cremo body wash not only does it smell great it has an amazing leather and washes off nicely leaving your skin feeling great"
Output:
{
 "Level 1 (PARENT)": ["product texture", "fragrance", "cleansing"],
 "Level 2 (CHILD)": ["lather", "personal likability (fragrance)", "regular cleansing"]
}

Example 23:
Core Item: "Amazing!!!!!!Gets all dirt off your hands/arms/nails. Unreal. Best I've ever used. Only other way to get the dirt off like this body wash is doing dishes... hands submerged in hot soapy water. Highly recommend"
Output:
{
 "Level 1 (PARENT)": ["brand value", "cleansing"],
 "Level 2 (CHILD)": ["best-in-class", "dirt removal"]
}

Example 24:
Core Item: "Amazing-I work underground construction ( outside and muddy year round) so usually soap is soap. I picked this up at random at a...[source](https://www.amazon.ca/Every-Man-Jack-Shower-Citrus/dp/B00SYD5KTG)
Output:
{
 "Level 1 (PARENT)": ["cleansing", "fragrance", "accessibility", "packaging"],
 "Level 2 (CHILD)": ["exfoliation", "fragrance type", "offline availability", "online availability", "sku"]
}

Example 25:
Core Item: "Amazing-Smells amazing! Thicker than other Axe products. They're hard to find in store so we ordered it here on amazon. The price is more than the $4 bottles in the store but this particular kind is worth it."
Output:
{
 "Level 1 (PARENT)": ["product texture", "accessibility", "fragrance"],
 "Level 2 (CHILD)": ["consistency", "offline availability", "online availability", "personal likability (fragrance)"]
}

Example 26:
Core Item: "Best Body Wash-I love this body wash, it takes just a bit of it to really lather up and leave you smelling great. Just love it. A true bargain in my opinion."
Output:
{
 "Level 1 (PARENT)": ["brand value", "product texture", "fragrance", "price"],
 "Level 2 (CHILD)": ["best-in-class", "lather", "personal likability (fragrance)", "value justification"]
}

Example 27:
Core Item: "Best body wash-This body wash is fantastic. I didn't order from Amazon, but everytime I shower I enjoy it so much the scent is strong and crystals feel nice. Plus an added bonus is it was never tested on animals. I'll definitely be getting this again but probably try another scent next time even citrus is great."
Output:
{
 "Level 1 (PARENT)": ["brand accountability", "brand value", "fragrance", "product texture"],
 "Level 2 (CHILD)": ["animal testing", "best-in-class", "fragrance strength", "granules"]
}

Example 28:
Core Item: "Best cruelty free product. Even beats name brand body washes-I've used quite a bit of standard body washes and also body washes that are cruelty free. I have to say, this body wash has the best longevity and clean scent over any of the standard old spice, Nivea, etc... It has a great lather and stays on the skin hours later after a wash. I'm on my second container of this size and have used countless standard size containers. Definitely give this a try. This price you can't beat and this will last you a few months of use"
Output:
{
 "Level 1 (PARENT)": ["brand accountability", "fragrance", "product texture", "price"],
 "Level 2 (CHILD)": ["animal testing", "fresh fragrance", "lather", "long lasting fragrance", "value justification"]
}

Example 29:
Core Item: "Best for the $-For the price this has to be the best mens bodywash. Lathers extremely well. I've used every scent. Cleans well & doesn't dry skin."
Output:
{
 "Level 1 (PARENT)": ["product texture", "cleansing", "product safety", "price"],
 "Level 2 (CHILD)": ["lather", "regular cleansing", "side effects", "value justification"]
}

Example 30:
Core Item: "Best fragrance ever-Not every store carries Black Chill which is a shame as it is one of the hen finest fragrances I have ever bought. Sophisticated, clean, and masculine. I wish there was a Black Chill cologne."
Output:
{
 "Level 1 (PARENT)": ["fragrance", "accessibility"],
 "Level 2 (CHILD)": ["personal likability (fragrance)", "offline availability"]
}

Example 31:
Core Item: "BODY WASH ( SHISO )-A nice refreshing scent, but does not last long. Needing a moisturizing, leaves the body dry, the skin should feel hidratante intense. Long-lasting moisturization with no greasy feel."
Output:
{
 "Level 1 (PARENT)": ["efficacy", "feel / finish", "fragrance", "skin care"],
 "Level 2 (CHILD)": ["long lasting results", "not greasy / oily", "fresh fragrance", "long lasting fragrance", "dryness control", "hydrating / moisturizing"]
}

Example 32:
Core Item: "Body Wash-I purchased all three fragrances and have begun using the orange (citrusy) fragrance. I love it because though there is a good hint of the orange fragrance it's not overpowering and it cleans and moisturizes."
Output:
{
 "Level 1 (PARENT)": ["cleansing", "fragrance", "skin care"],
 "Level 2 (CHILD)": ["regular cleansing", "fragrance strength", "fragrance type", "hydrating / moisturizing"]
}

Example 33:
Core Item: "Consistent quality.-I like the blades consistent quality. I am now using Harry's body wash. My wife likes the scent. I am now finding Harry's products in the stores."
Output:
{
 "Level 1 (PARENT)": ["accessibility", "companion approval"],
 "Level 2 (CHILD)": ["offline availability", "spouse / partner"]
}

Example 34:
Core Item: "Easy to Use, Easy To Order-I like Harrys because it is also offered at our local Target. Meaning I can try the shaving cream, face wash, body wash, etc. before I decide to include it with my order. This, and the quality of the product, is why I chose Harrys over their competitors."
Output:
{
 "Level 1 (PARENT)": ["accessibility", "brand value"],
 "Level 2 (CHILD)": ["offline availability", "product quality"]
}

Example 35:
Core Item: "Excellent products-You get an excellent clean shave with these blades. So much better than store bought blades. Also their other products are very good too. I have used the body wash, face scrub, and face cream and they are some of the best I have ever tried. And best of all the prices are very reasonable."
Output:
{
 "Level 1 (PARENT)": ["brand value", "price"],
 "Level 2 (CHILD)": ["best-in-class", "personal experience", "value justification"]
}

Example 36:
Core Item: "Except great service and Harry's Goods.-Outstanding products my first time using the body wash...my wife love the men fragrance. Men body wash. Great fragrance."
Output:
{
 "Level 1 (PARENT)": ["companion approval", "fragrance"],
 "Level 2 (CHILD)": ["spouse / partner", "masculine fragrance"]
}

Example 37:
Core Item: "Good products, easy to order and receive.-Razor, shave cream and aftershave balm are excellent. Body wash excellent, shampoo/conditioner not as much."
Output:
{
 "Level 1 (PARENT)": ["brand value", "customer service"],
 "Level 2 (CHILD)": ["personal experience", "purchase / delivery"]
}

Example 38:
Core Item: "Great customer service and great product-Harry's is a very responsive company with a great product. They alert me before the next scheduled shipment in case I want to add anything else and my shaving cream and blades come right when they tell me they will. Their body wash is exceptionally good and washes off better than any other I have tried."
Output:
{
 "Level 1 (PARENT)": ["cleansing"],
 "Level 2 (CHILD)": ["regular cleansing"]
}

Example 39:
Core Item: "Great Product. Great Price.-I ordered an extra Truman and two of their new body washes. The body wash bottles are bigger than the ones I usually buy and are only 7 dollars. I was extremely pleased with that and obviously, I was pleased with the quality. I definitely recommend the body washes if you want a good deal. I hope Harry's keeps expanding their product line so that I can have more options as a consumer."
Output:
{
 "Level 1 (PARENT)": ["brand value", "price"],
 "Level 2 (CHILD)": ["product quality", "value justification"]
}

Example 40:
Core Item: "Great Products!-Since I started ordering my shaving supplies from Harry's, I've enjoyed many smooth shaves at a lower cost than when I used some of the other products in the industry! I appreciate how easy it is to order online and the timely reminders regarding my next shipment. This allows me to accept the shipment or delay it depending upon my busy work schedule. Recently, I tried the body wash in the Fig flavor which was at once satisfying and cleansing. Overall, Harry's continues to be my number one supplier of male bathroom hygiene products!"
Output:
{
 "Level 1 (PARENT)": ["brand value", "cleansing", "efficacy"],
 "Level 2 (CHILD)": ["personal experience", "specialist", "regular cleansing", "quick results"]
}

Example 41:
Core Item: "great products-I have never been as satisfied as I am now with shaving products. All the products I buy have been excellent. From the shaving cream, face wash, razors and now body wash I am very satisfied."
Output:
{
 "Level 1 (PARENT)": ["brand value"],
 "Level 2 (CHILD)": ["personal experience"]
}

Example 42:
Core Item: "Great Products-I really like the feel of the razor handle, nice sharp blades. The shave gel and balm are both comfortable on my skin. Just purchased the Stone body wash and enjoy the scent. Convenient shipping and to make changes to order before it ships"
Output:
{
 "Level 1 (PARENT)": ["fragrance"],
 "Level 2 (CHILD)": ["personal likability (fragrance)"]
}

Example 43:
Core Item: "Great quality and service-I've had my Harry's razor since 2014, and love it. Great blades, shave gel, after shave, etc. I've also ordered the travel sizes and some other products (body wash, face wash) that are great too. Highly recommend."
Output:
{
 "Level 1 (PARENT)": ["brand value"],
 "Level 2 (CHILD)": ["personal experience"]
}

Example 44:
Core Item: "Great shave Great deal-Tried Harry's razor. As good of better than the expensive brands. Then I tried body wash, cleans great, all the varieties smell great. All the Harry's products are awesome."
Output:
{
 "Level 1 (PARENT)": ["brand value", "cleansing", "fragrance"],
 "Level 2 (CHILD)": ["personal experience", "regular cleansing", "personal likability (fragrance)"]
}

Example 45:
Core Item: "Great Shave, eta.-Love Harry's blades as they give me a nice close shave and for a lot less than Schick and Gillette products. Also their body washes are wonderful too. They leave my skin clean and moist. The fragrances are good and not over powering. I hope they come out with some products for the hair: shampoos, conditioners and gels."
Output:
{
 "Level 1 (PARENT)": ["cleansing", "fragrance", "skin care"],
 "Level 2 (CHILD)": ["regular cleansing", "fragrance strength", "hydrating / moisturizing"]
}

Example 46:
Core Item: "Great stuff; I have been sold for a while.-Same as above, but my daughter is going to try your Flamingo brand. I like your body wash well enough to start using it regularly."
Output:
{
 "Level 1 (PARENT)": ["brand value"],
 "Level 2 (CHILD)": ["personal experience"]
}

Example 47:
Core Item: "Great Stuff-Shaving stuff is great but what I get the most comments on is the Stone body wash. Wife loves it and I've had compliments from other ladies. MMMM You smell good! True - the ladies go for it."
Output:
{
 "Level 1 (PARENT)": ["companion approval", "social recognition"],
 "Level 2 (CHILD)": ["spouse / partner", "sex appeal"]
}

Example 48:
Core Item: "Grrat product great price. Love the products-Love the products especially the body wash!! Great quick service. Great price. Razor last for many shaves well worth the $$ Big Bang for small bucks"
Output:
{
 "Level 1 (PARENT)": ["brand value", "price"],
 "Level 2 (CHILD)": ["brand affinity / loyalty", "value justification"]
}

Example 49:
Core Item: "Happy with Harry's-Just got my second order and I added body wash and face cream. Harry's is 5 for 5 Your razor, shaving cream, face wash and the 2 new products have all been great. Can't wait to see what's next"
Output:
{
 "Level 1 (PARENT)": ["brand value"],
 "Level 2 (CHILD)": ["personal experience", "specialist"]
}

Example 50:
Core Item: "Harry's always...What else is there???-Been associated with now for about 2 years and never went back to department store blades. I'm also loving the new products, the body wash and after shaving mist."
Output:
{
 "Level 1 (PARENT)": ["brand value"],
 "Level 2 (CHILD)": ["personal experience"]
}
"""
# --- Define LLM Interaction Function ---
def classify_item_with_llm(core_item_description, item_index): # Added item_index for better print
    # --- IMPORTANT: Get all unique labels from your full dataset to constrain the LLM ---
    # This requires accessing the full_labeled_df to extract all possible unique L1 and L2 labels.
    # We'll make these dynamic.
    all_l1_labels = set()
    all_l2_labels = set()


    # str(label) – Ensures the value is a string (in case it’s something else like an integer).
    # .split(',') – Splits the string into parts using a comma (,) as a separator.
       # → This handles cases where multiple labels are in one cell like "personal care, grooming".

    # item.strip() – Removes leading/trailing spaces around each label.
    # if item.strip() – Ensures you don’t include empty strings.
    # Extract from the "Level 1" column
    for label in full_labeled_df['Level 1 (PARENT)'].dropna():
        all_l1_labels.update([item.strip().lower() for item in str(label).split(',') if item.strip()])

     # Extract from the "Level 2" column
    for label in full_labeled_df['Level 2 (CHILD)'].dropna():
        all_l2_labels.update([item.strip().lower() for item in str(label).split(',') if item.strip()])
    # Sort for consistent display in prompt
    sorted_l1_labels = sorted(list(all_l1_labels))
    sorted_l2_labels = sorted(list(all_l2_labels))

    # Construct the constraint part of the prompt
    # --- STRONGER CONSTRAINT LANGUAGE ADDED HERE ---
    label_constraint_prompt = f"""
    YOU MUST ONLY CHOOSE LABELS FROM THE PROVIDED LISTS.
    DO NOT invent new labels or use synonyms not explicitly listed.
    If a concept is mentioned but does not align with one of the exact labels below, DO NOT generate a label for it; omit it.
    Prioritize accuracy over quantity of labels. Only provide labels that are clearly and directly supported by the text and exactly match a label from the provided lists.

    Level 1 (PARENT) Labels MUST ONLY be chosen from this list: {sorted_l1_labels}
    Level 2 (CHILD) Labels MUST ONLY be chosen from this list: {sorted_l2_labels}
    If no label from the list is applicable, provide an empty list for that level.
    """

    system_prompt = f"""You are a product categorization expert of bodywash product. Your task is to analyze product reviews ('Core Item') and identify *ALL RELEVANT AND DIRECTLY SUPPORTED* Level 1 (PARENT) and Level 2 (CHILD) factors. Level 2 (CHILD) factors are more granular and nested under Level 1 (PARENT). An Core Item can have multiple Level 1 (PARENT) and Level 2 (CHILD) factors.

    YOU MUST ONLY CHOOSE LABELS FROM THE PROVIDED LISTS. DO NOT invent new labels or use synonyms not explicitly listed, DO NOT over-predict, and EXACTLY match.
    *CRITICAL:* If a concept is mentioned but does not align with one of the exact labels below, *DO NOT generate a label for it; omit it.* Prioritize *precision and accuracy*. Only provide labels that are clearly and directly supported by the text and *exactly match* a label from the provided lists. *Do not over-predict.*

   Level 1 (PARENT) Labels MUST ONLY be chosen from this list: {sorted_l1_labels}
   Level 2 (CHILD) Labels MUST ONLY be chosen from this list: {sorted_l2_labels}
   If no label from the list is applicable, provide an empty list for that level.

   Provide the output ONLY as a JSON object with two keys: "Level 1 (PARENT)" and "Level 2 (CHILD)". Each key should contain a list of strings. If no factors are applicable, provide an empty list. DO NOT include any other text, explanations, or formatting outside the JSON.
"""

    user_prompt = f"""
{FEW_SHOT_PROMPT_PART}

---
Now, classify the following item:

Core Item: "{core_item_description}"
Output:
"""
    try:
        chat_completion = client.chat.completions.create(
            messages=[
                {
                    "role": "system",
                    "content": system_prompt,
                },
                {
                    "role": "user",
                    "content": user_prompt,
                }
            ],
            model="llama3-8b-8192",  # Using the 70B model as discussed
            response_format={"type": "json_object"},
            temperature=0.0,
        )
        response_content = chat_completion.choices.message.content # Corrected:.choices.message.content

        # --- DEBUG PRINT: Raw LLM Output ---
        # Removed this debug print for full run to avoid massive console output
        # if item_index < 5:
        #     print(f"\n--- DEBUG RAW LLM OUTPUT for item {item_index} ('{core_item_description[:50]}...') ---")
        #     print(response_content)
        #     print("----------------------------------------------------------------------")

        parsed_output = json.loads(response_content)

        # Standardize predicted labels to lowercase and strip whitespace
        l1 = [str(f).strip().lower() for f in parsed_output.get("Level 1", []) if str(f).strip()]
        l2 = [str(f).strip().lower() for f in parsed_output.get("Level 2", []) if str(f).strip()]
        return l1, l2
    except IndexError:
        print(f"IndexError: Could not get response for item '{core_item_description}'")
        return [], [], # Return empty lists on error
    except json.JSONDecodeError as e:
        print(f"JSON Parsing Error for item '{core_item_description}': {e}")
        print(f"Problematic content: '{response_content}'")
        return [], [] # Return empty lists on error
    except Exception as e:
        print(f"General Error during LLM interaction for item '{core_item_description}': {e}")
        return [], [] # Return empty lists on error

# --- Helper function to run predictions ---
def run_predictions(df_to_predict, description="Full Dataset Prediction"): # Removed num_rows_to_predict
    predicted_level1 = [] # Corrected: initialized as empty list
    predicted_level2 = [] # Corrected: initialized as empty list
    items_to_process = df_to_predict # Process the entire DataFrame passed

    print(f"\n--- Starting prediction for {description}: {len(items_to_process)} items ---")
    start_prediction_time = time.time()

    delay_seconds = 2.0 # Keep this at 2.0s to respect Groq's 30 RPM limit

    for index, row in items_to_process.iterrows():
        core_item = str(row['Core Item'])
        # Pass the global index to classify_item_with_llm for debug printing
        l1, l2 = classify_item_with_llm(core_item, index)
        level1_results.append(', '.join(l1))
        level2_results.append(', '.join(l2))

        if (index + 1) % 100 == 0: # Print progress every 100 items for full runs
            print(f"  Processed {index + 1}/{len(items_to_process)} items. Time elapsed: {(time.time() - start_prediction_time):.2f}s")
        time.sleep(delay_seconds)

    elapsed_prediction_time = time.time() - start_prediction_time
    print(f"--- Completed prediction for {description} in {elapsed_prediction_time:.2f} seconds ---")
    # ✅ Copy the original DataFrame and attach predictions
    results_df = items_to_process.copy()
    results_df["Predicted Level 1 (PARENT)"] = predicted_level1 # Corrected: assign to column
    results_df["Predicted Level 2"] = predicted_level2 # Corrected: assign to column
    return results_df

# --- Function to evaluate multi-label classification ---
def evaluate_multi_label(y_true_col, y_pred_col, all_possible_labels_source_series, label_name): # Corrected argument names
    # Gather all unique labels from both true and predicted sets, and the source series
    # This ensures MLB is fitted on a comprehensive vocabulary
    all_unique_labels_union = set()

    # Add labels from true lists 
    for label_list in y_true_col.tolist(): # Iterate over the Series content
        all_unique_labels_union.update(label_list) # Use update as it's already a list of strings

    # Add labels from predicted lists 
    for label_list in y_pred_col.tolist(): # Iterate over the Series content
        all_unique_labels_union.update(label_list)

    # Add labels from the full source series
    for labels_item in all_possible_labels_source_series.tolist(): # Iterate over items in the series
        if isinstance(labels_item, list): # If it's already a list (from grouped_df)
            for label in labels_item:
                if label.strip():
                    all_unique_labels_union.add(label.strip().lower())
        elif isinstance(labels_item, str): # If it's a raw comma-separated string (from full_labeled_df)
            for label in labels_item.split(','):
                if label.strip():
                    all_unique_labels_union.add(label.strip().lower())

    mlb = MultiLabelBinarizer()
    mlb.fit([list(all_unique_labels_union)]) # Fit MLB on the union of all observed labels
    # Transform using the fitted MLB
    y_true_bin = mlb.transform(y_true_col.tolist()) # Pass lists of lists
    y_pred_bin = mlb.transform(y_pred_col.tolist()) # Pass lists of lists

    precision = precision_score(y_true_bin, y_pred_bin, average='micro', zero_division=0)
    recall = recall_score(y_true_bin, y_pred_bin, average='micro', zero_division=0)
    f1 = f1_score(y_true_bin, y_pred_bin, average='micro', zero_division=0)
    jaccard = jaccard_score(y_true_bin, y_pred_bin, average='micro', zero_division=0)

    print(f"\n--- Evaluation Metrics for {label_name} ---")
    print(f"  Micro Precision: {precision:.4f}")
    print(f"  Micro Recall:    {recall:.4f}")
    print(f"  Micro F1-Score:  {f1:.4f}")
    print(f"  Micro Jaccard Index: {jaccard:.4f}")
    print("---------------------------------------")


# ==============================================================================
# --- MAIN FULL WORKFLOW EXECUTION ---
# ==============================================================================

print("\n===== STARTING FULL WORKFLOW EXECUTION =====")

# PHASE 1: Predict and Evaluate on the FULL Validation Set
print("\n===== PHASE 1: Predicting and Evaluating on Validation Set =====")

# Prompt user before full validation set prediction and evaluation
input("\nPress Enter to proceed with FULL PREDICTION and EVALUATION on the entire Validation Set (approx. 1134 unique items)...")

full_validation_results_df = run_predictions(validation_df, description="FULL PREDICTION (entire Validation Set)")

# Perform evaluation using the original 'Level 1 (PARENT)'/'Level 2 (CHILD)' and the new 'Predicted Level 1 (PARENT)'/'Predicted Level 2 (CHILD)'
print("\n===== Evaluation on Validation Set =====")
evaluate_multi_label(
    y_true=full_validation_results_df["Level 1 (PARENT)"],
    y_pred=full_validation_results_df["Predicted Level 1 (PARENT)"],
    mlb_fit_df=full_labeled_df,
    title="Level 1 (PARENT) Factors (Validation Set)"
)
evaluate_multi_label(
    y_true=full_validation_results_df["Level 2 (CHILD)"],# Pass the true Level 2 column
    y_pred=full_validation_results_df["Predicted Level 2 (CHILD)"],, # Pass the predicted Level 2 column
    mlb_fit_df=full_labeled_df, # Pass the original full column for MLB fitting
    title="Level 2 (CHILD) Factors (Validation Set)"
)
print("\n--- Sample of Full Validation Set Predictions ---")
print(full_validation_results_df.head())
print(full_validation_results_df.columns)
# Saving to CSV
output_validation_filename = "bodywash_validation_predictions_with_accuracy.csv"
full_validation_results_df.to_csv(output_validation_filename, index=False)
print(f"Full validation set predictions saved to {output_validation_filename}")


# PHASE 2: Predict on the Original Unlabeled Test File (bodywash-test.xlsx - Sheet1.csv)
print("\n\n===== PHASE 2: Predicting on Original Unlabeled Test File =====")

# Prompt user before proceeding with the final unseen test set prediction
input("\nPress Enter to proceed with prediction on the ORIGINAL UNLABELED TEST FILE (bodywash-test.xlsx - Sheet1.csv - approx. 216 items)...")

final_unlabeled_predictions_df = run_predictions(final_unlabeled_test_df, description="FINAL UNLABELED TEST FILE PREDICTION")

# --- Save Final Unlabeled Predictions ---
final_output_filename = "bodywash_final_unlabeled_predictions.csv"
final_unlabeled_predictions_df.to_csv(final_output_filename, index=False)
print(f"\nFinal predictions for unlabeled test file saved to {final_output_filename}")

print("\n--- Sample of Final Unlabeled Predictions (first 5 rows) ---")
print(final_unlabeled_predictions_df.head())
print(f"\nTotal items processed in final unlabeled run: {len(final_unlabeled_predictions_df)}")

print("\n\nProject completed! You have split your data, evaluated model performance, and predicted on the final test set.")
