# Step 1: Define the Data Structures
We’ll need:

	•	A list of recipes, where each recipe contains a list of ingredients.
	•	A dictionary mapping each ingredient to its set of flavor compounds.
	•	A count of the number of recipes in the cuisine.


# Step 2: Compute Mean Number of Shared Compounds

In [None]:
import numpy as np
from collections import defaultdict

# Sample data structures
recipes = [
    {'name': 'recipe1', 'ingredients': ['chicken', 'broth', 'mustard', 'cream']},
    {'name': 'recipe2', 'ingredients': ['apple', 'pork', 'cheddar']},
    # Add more recipes as needed
]

flavor_compounds = {
    'chicken': {'C1', 'C2'},
    'broth': {'C3', 'C4'},
    'mustard': {'C5', 'C6'},
    'cream': {'C7', 'C8'},
    'apple': {'C9', 'C10'},
    'pork': {'C11', 'C12'},
    'cheddar': {'C13', 'C14'},
    # Add more ingredients and their flavor compounds as needed
}

Nc = len(recipes)

# Function to compute Ns(R)
def mean_shared_compounds(recipe, flavor_compounds):
    ingredients = recipe['ingredients']
    nR = len(ingredients)
    if nR <= 1:
        return 0
    shared_sum = 0
    for i in range(nR):
        for j in range(i+1, nR):
            Ci = flavor_compounds[ingredients[i]]
            Cj = flavor_compounds[ingredients[j]]
            shared_sum += len(Ci.intersection(Cj))
    Ns_R = (2 / (nR * (nR - 1))) * shared_sum
    return Ns_R

# Compute Ns for all recipes
Ns_real = np.mean([mean_shared_compounds(recipe, flavor_compounds) for recipe in recipes])

# For the null model, you would create a random reference dataset (not shown here)
# Ns_rand = ...

# Calculate ΔNs
# ΔNs = Ns_real - Ns_rand

# Placeholder for Ns_rand
Ns_rand = 0
ΔNs = Ns_real - Ns_rand

print(f"Mean number of shared compounds (Ns): {Ns_real}")
print(f"ΔNs: {ΔNs}")

# Step 3: Compute Contribution of Each Ingredient

In [None]:
# Count occurrences of each ingredient in the cuisine
ingredient_counts = defaultdict(int)
for recipe in recipes:
    for ingredient in recipe['ingredients']:
        ingredient_counts[ingredient] += 1

# Function to compute contribution χi
def ingredient_contribution(ingredient, recipes, flavor_compounds, Nc, ingredient_counts):
    fi = ingredient_counts[ingredient]
    sum_shared_real = 0
    sum_shared_rand = 0  # This should be computed using the random dataset
    for recipe in recipes:
        if ingredient in recipe['ingredients']:
            nR = len(recipe['ingredients'])
            for j in range(nR):
                if recipe['ingredients'][j] != ingredient:
                    Ci = flavor_compounds[ingredient]
                    Cj = flavor_compounds[recipe['ingredients'][j]]
                    sum_shared_real += len(Ci.intersection(Cj))
                    # sum_shared_rand += ... (compute for random dataset)

    term1 = (1 / Nc) * sum_shared_real * (2 / (nR * (nR - 1)))
    term2 = (2 * fi / (Nc * nR)) * sum_shared_rand  # Placeholder, should be computed with random data
    χi = term1 - term2
    return χi

# Compute contributions for all ingredients
contributions = {ingredient: ingredient_contribution(ingredient, recipes, flavor_compounds, Nc, ingredient_counts)
                 for ingredient in flavor_compounds.keys()}

print("Contributions of each ingredient:")
for ingredient, contribution in contributions.items():
    print(f"{ingredient}: {contribution}")

# Step 4: Compute Prevalence Measures

In [None]:
# Function to compute prevalence Pc_i
def ingredient_prevalence(ingredient, recipes, Nc):
    n_i_c = ingredient_counts[ingredient]
    Pc_i = n_i_c / Nc
    return Pc_i

# Compute relative prevalence Pc'_i
def relative_prevalence(ingredient, recipes, Nc, total_recipes):
    Pc_i = ingredient_prevalence(ingredient, recipes, Nc)
    Pc_i_other = Pc_i * total_recipes / (total_recipes - Nc)
    relative_prevalence = Pc_i - Pc_i_other
    return relative_prevalence

# Compute prevalences for all ingredients
prevalences = {ingredient: ingredient_prevalence(ingredient, recipes, Nc)
               for ingredient in flavor_compounds.keys()}
relative_prevalences = {ingredient: relative_prevalence(ingredient, recipes, Nc, total_recipes=len(recipes))
                        for ingredient in flavor_compounds.keys()}

print("Prevalence of each ingredient:")
for ingredient, prevalence in prevalences.items():
    print(f"{ingredient}: {prevalence}")

print("Relative Prevalence of each ingredient:")
for ingredient, rel_prevalence in relative_prevalences.items():
    print(f"{ingredient}: {rel_prevalence}")

# Notes:

	•	The code snippets provided here assume a simplistic approach and some placeholder values for random reference calculations. In a real implementation, you would need to generate a random dataset and compute the corresponding values for Ns_rand and sum_shared_rand.
	•	Make sure to replace placeholders with appropriate calculations based on your actual dataset and the random reference model.
	•	The data structures and sample data provided here are for demonstration purposes. Your actual data will need to be loaded and processed accordingly.