In [None]:
from fpdf import FPDF
import pandas as pd

# Function to calculate ingredients with value 1 for each region
def calculate_ingredients_with_value_1(df):
    ingredients_with_1 = {}

    for index, row in df.iterrows():
        region = row['region']
        counts = {col: 1 for col, value in row.items() if value == 1 and col not in ['region', 'country']}
        
        # Sort ingredients alphabetically
        sorted_ingredients = {k: v for k, v in sorted(counts.items())}

        if region not in ingredients_with_1:
            ingredients_with_1[region] = sorted_ingredients
        else:
            ingredients_with_1[region].update(sorted_ingredients)

    return ingredients_with_1

# Function to calculate the percentage prevalence of an ingredient in a region
def percentage_prevalence(ingredient_column, Nc):
    Pc_i = ingredient_column.sum() / Nc
    return Pc_i

# Function to calculate prevalences for each region
def calculate_prevalences_by_region(df):
    regions = df['region'].unique()
    region_prevalences = {}

    for region in regions:
        region_df = df[df['region'] == region]
        
        # Calculate Nc for the current region
        Nc_region = len(region_df)
        
        # Calculate the percentage prevalences for each ingredient in the current region
        prevalences = {}
        for ingredient in region_df.columns[2:]:
            prevalences[ingredient] = percentage_prevalence(region_df[ingredient], Nc_region)
        
        # Sort ingredients by percentage prevalence and take the top 10
        sorted_prevalences = sorted(prevalences.items(), key=lambda x: x[1], reverse=True)[:10]
        sorted_prevalences = {item[0]: item[1] for item in sorted_prevalences}
        
        # Add the top 10 ingredient prevalences to the current region
        region_prevalences[region] = sorted_prevalences
    
    return region_prevalences

# Example dataframe (ingredients_df_with_names) assumed to be defined elsewhere

# Calculate ingredients with value 1 for each region
ingredients_with_1 = calculate_ingredients_with_value_1(ingredients_df_with_names)

# Calculate prevalences for each region
region_prevalences = calculate_prevalences_by_region(ingredients_df_with_names)

# Iterate over the dictionary to create a separate PDF for each region
for region, ingredients in ingredients_with_1.items():
    # Initialize PDF for the current region
    pdf = FPDF()
    pdf.add_page()

    # Set font
    pdf.set_font("Arial", size=12)

    # Add cuisine title instead of region
    pdf.cell(200, 10, f"Cuisine: {region}", ln=True)

    # Prepare a single string with ingredients for the region (sorted alphabetically)
    ingredients_list = ', '.join(ingredients.keys())

    # Write the ingredients list using MultiCell for automatic wrapping
    pdf.multi_cell(200, 10, f"Ingredients: {ingredients_list}")

    pdf.cell(200, 10, "", ln=True)  # Empty space

    # Add prevalences information
    pdf.multi_cell(200, 10, "Top 10 Ingredient Prevalences:")

    for ingredient, prevalence in region_prevalences[region].items():
        pdf.multi_cell(200, 10, f"{ingredient}: {prevalence:.2%}")

    pdf.cell(200, 10, "", ln=True)  # Empty space

    # Add additional information from ingr_small_copy
    for ingredient, _ in ingredients.items():
        if ingredient in ingr_small_copy['ingredient_name'].values:
            row = ingr_small_copy[ingr_small_copy['ingredient_name'] == ingredient].iloc[0]
            compounds_list = ', '.join(map(str, row['compounds_present']))
            pdf.multi_cell(200, 10, f"Ingredient: {ingredient}")
            pdf.multi_cell(200, 10, f"Compounds Present: {compounds_list}")

    pdf.cell(200, 10, "", ln=True)  # Empty space

    # Save the PDF for the current region with cuisine name
    pdf_file = f"{region}_ingredients_by_cuisine_compounds.pdf"
    pdf.output(pdf_file)

    print(f"PDF created successfully: {pdf_file}")