In [8]:
import pandas as pd

# Example data
data = {
    'PID': [0, 1, 2, 3, 4],
    'Formulation': ['i1,i5,i4', 'i1,i3', 'i1,i4', 'i4,i9', 'i1,i5,i3'],
    'Category': ['eye', 'lip', 'eye', 'eye', 'lip']
}

# Load data into a DataFrame
df = pd.DataFrame(data)

In [9]:
df

Unnamed: 0,PID,Formulation,Category
0,0,"i1,i5,i4",eye
1,1,"i1,i3",lip
2,2,"i1,i4",eye
3,3,"i4,i9",eye
4,4,"i1,i5,i3",lip


In [10]:
from collections import Counter

# Split the formulation strings into lists of ingredients
df['Ingredients'] = df['Formulation'].apply(lambda x: x.split(','))

# Flatten the list of all ingredients
all_ingredients = [ingredient for sublist in df['Ingredients'] for ingredient in sublist]

# Count the occurrences of each ingredient
ingredient_counts = Counter(all_ingredients)

# Convert to DataFrame for better readability
popular_ingredients_df = pd.DataFrame(ingredient_counts.items(), columns=['Ingredient', 'Count'])
popular_ingredients_df = popular_ingredients_df.sort_values(by='Count', ascending=False)

print(popular_ingredients_df)

  Ingredient  Count
0         i1      4
2         i4      3
1         i5      2
3         i3      2
4         i9      1


In [11]:
from itertools import combinations

# Find all possible pairs of ingredients in each product
def get_combinations(ingredients):
    return list(combinations(ingredients, 2))

# Apply the function to get all combinations for each product
df['IngredientCombinations'] = df['Ingredients'].apply(get_combinations)

# Flatten the list of all ingredient combinations
all_combinations = [combo for sublist in df['IngredientCombinations'] for combo in sublist]

# Count the occurrences of each combination
combination_counts = Counter(all_combinations)

# Convert to DataFrame for better readability
popular_combinations_df = pd.DataFrame(combination_counts.items(), columns=['Combination', 'Count'])
popular_combinations_df = popular_combinations_df.sort_values(by='Count', ascending=False)

print(popular_combinations_df)

  Combination  Count
0    (i1, i5)      2
1    (i1, i4)      2
3    (i1, i3)      2
2    (i5, i4)      1
4    (i4, i9)      1
5    (i5, i3)      1


In [12]:
# Count the occurrences of each ingredient in each sub-category
category_ingredient_counts = df.explode('Ingredients').groupby(['Category', 'Ingredients']).size().reset_index(name='Count')

# Sort by category and count
category_ingredient_counts = category_ingredient_counts.sort_values(by=['Category', 'Count'], ascending=[True, False])

print(category_ingredient_counts)

  Category Ingredients  Count
1      eye          i4      3
0      eye          i1      2
2      eye          i5      1
3      eye          i9      1
4      lip          i1      2
5      lip          i3      2
6      lip          i5      1


In [13]:
# Count the occurrences of each ingredient combination in each sub-category
category_combination_counts = df.explode('IngredientCombinations').groupby(['Category', 'IngredientCombinations']).size().reset_index(name='Count')

# Sort by category and count
category_combination_counts = category_combination_counts.sort_values(by=['Category', 'Count'], ascending=[True, False])

print(category_combination_counts)

  Category IngredientCombinations  Count
0      eye               (i1, i4)      2
1      eye               (i1, i5)      1
2      eye               (i4, i9)      1
3      eye               (i5, i4)      1
4      lip               (i1, i3)      2
5      lip               (i1, i5)      1
6      lip               (i5, i3)      1


In [24]:
import pandas as pd

# Example data
# data = {
#     'PID': [0, 1, 2, 3],
#     'Formulation': ['i1,i2,i5,i10', 'i1,i6,i9', 'i2,i3,i5', 'i1,i2,i3,i5'],
#     'Category': ['lip care', 'eye care', 'face cleanser', 'face/neck care']
# }

data = {
    'PID': [0, 1, 2, 3, 4],
    'Formulation': ['i1,i5,i4', 'i1,i3', 'i1,i4', 'i4,i9', 'i1,i5,i3'],
    'Category': ['eye', 'lip', 'eye', 'eye', 'lip']
}


# Load data into a DataFrame
df = pd.DataFrame(data)

# Split the formulation strings into lists of ingredients
df['Ingredients'] = df['Formulation'].apply(lambda x: x.split(','))

In [25]:
from collections import Counter

# Flatten the list of all ingredients
all_ingredients = [ingredient for sublist in df['Ingredients'] for ingredient in sublist]

# Count the occurrences of each ingredient
ingredient_counts = Counter(all_ingredients)

# Convert to DataFrame for better readability
popular_ingredients_df = pd.DataFrame(ingredient_counts.items(), columns=['Ingredient', 'Count'])
popular_ingredients_df = popular_ingredients_df.sort_values(by='Count', ascending=False)

print(popular_ingredients_df)

  Ingredient  Count
0         i1      4
2         i4      3
1         i5      2
3         i3      2
4         i9      1


In [26]:
from itertools import combinations

# Find all possible pairs of ingredients in each product
def get_combinations(ingredients):
    return list(combinations(ingredients, 2))

# Apply the function to get all combinations for each product
df['IngredientCombinations'] = df['Ingredients'].apply(get_combinations)

# Flatten the list of all ingredient combinations
all_combinations = [combo for sublist in df['IngredientCombinations'] for combo in sublist]

# Count the occurrences of each combination
combination_counts = Counter(all_combinations)

# Convert to DataFrame for better readability
popular_combinations_df = pd.DataFrame(combination_counts.items(), columns=['Combination', 'Count'])
popular_combinations_df = popular_combinations_df.sort_values(by='Count', ascending=False)

print(popular_combinations_df)

  Combination  Count
0    (i1, i5)      2
1    (i1, i4)      2
3    (i1, i3)      2
2    (i5, i4)      1
4    (i4, i9)      1
5    (i5, i3)      1


In [27]:
# Define a function to count ingredients by category
def count_ingredients_by_category(category_df):
    all_ingredients = [ingredient for sublist in category_df['Ingredients'] for ingredient in sublist]
    ingredient_counts = Counter(all_ingredients)
    return pd.DataFrame(ingredient_counts.items(), columns=['Ingredient', 'Count']).sort_values(by='Count', ascending=False)

# Get the unique categories
categories = df['Category'].unique()

# Create a dictionary to store the results for each category
category_ingredient_counts = {}

for category in categories:
    category_df = df[df['Category'] == category]
    category_ingredient_counts[category] = count_ingredients_by_category(category_df)

# Print the results for each category
for category, counts_df in category_ingredient_counts.items():
    print(f"Popular ingredients in {category}:")
    print(counts_df)
    print()

Popular ingredients in eye:
  Ingredient  Count
2         i4      3
0         i1      2
1         i5      1
3         i9      1

Popular ingredients in lip:
  Ingredient  Count
0         i1      2
1         i3      2
2         i5      1



In [28]:
# Define a function to count ingredient combinations by category
def count_combinations_by_category(category_df):
    all_combinations = [combo for sublist in category_df['IngredientCombinations'] for combo in sublist]
    combination_counts = Counter(all_combinations)
    return pd.DataFrame(combination_counts.items(), columns=['Combination', 'Count']).sort_values(by='Count', ascending=False)

# Create a dictionary to store the results for each category
category_combination_counts = {}

for category in categories:
    category_df = df[df['Category'] == category]
    category_combination_counts[category] = count_combinations_by_category(category_df)

# Print the results for each category
for category, counts_df in category_combination_counts.items():
    print(f"Popular ingredient combinations in {category}:")
    print(counts_df)
    print()

Popular ingredient combinations in eye:
  Combination  Count
1    (i1, i4)      2
0    (i1, i5)      1
2    (i5, i4)      1
3    (i4, i9)      1

Popular ingredient combinations in lip:
  Combination  Count
0    (i1, i3)      2
1    (i1, i5)      1
2    (i5, i3)      1

