In [1]:
import pandas as pd

In [3]:
def calculate_average_pollen_per_visit(df):
    """
    Calculate the average pollen load per visit for each bee species and flower species pair

    Parameters:
    df (pd.DataFrame): The merged DataFrame containing pollen load and visitation information.                          

    Returns:
    pd.DataFrame: DataFrame with 'bee_species', 'flower_species', and 'avg_pollen_per_visit' columns.
    """
    # Group by bee species, then compute average pollen load
    avg_pollen = (                          
        df.groupby(['bee_species', 'flower_species'])['pollen_load']
        .mean()
        .reset_index()
        .rename(coulmns={'pollen_load': 'average_pollen_per_visit'})
    )
    return avg_pollen 

In [5]:
def find_top_flower_for_each_bee(df):
    """
    Identifies, for each bee species, the flower species with the highest average pollen load per visit. 

    Parameters:
    df (pd.DataFrame): The merged DataFrame containing pollen load and visitation information.                   

    Returns:
    pd.DataFrame: DataFrame with 'bee_species', 'flower_species', and 'avg_pollen_per_visit' columns for top flowers.
    """
    # Use the previous function to get average pollen load per bee-flower pair          
    avg_pollen = calculate_average_pollen_per_visit(df)
    
    # For each bee, find the flower with the maximum average pollen load
    top_flower = (
        
avg_pollen.loc[avg_pollen.groupby('bee_species')['avg_pollen_per_visit'].idxmax()]
        .reset_index(drop=True)
    )
    return top_flower

def summarize_visitation_patterns(df):
    """
    Summarizes total visits and mean pollen load for each flower species across all bees.
    
    Parameters:
    df (pd.DataFrame): The merged DataFrame containing pollen load and visitation information.
    
    Returns:
    pd.DataFrame: DataFrame with 'flower_species', 'total_visits', and 'mean_pollen_load' columns.
    """
    # Aggregate total visits and mean pollen load by flower species
    summary = (
        df.groupby('flower_species')
        .agg(total_visits=('visit_id', 'count'), mean_pollen_load=('pollen_load', 'mean'))
        .reset_index()
    )
    return summary