In [29]:
import pandas as pd
import os

In [30]:
def calculate_average_pollen_per_visit(df):
    """
    Calculate the average pollen load per visit for each bee species and flower species pair

    Parameters:
    df (pd.DataFrame): The merged DataFrame containing pollen load and visitation information.                          

    Returns:
    pd.DataFrame: DataFrame with 'bee_species', 'flower_species', and 'avg_pollen_per_visit' columns.
    """
    # Group by bee species, then compute average pollen load
    avg_pollen = (                          
        df.groupby(['bee_species', 'flower_species'])['pollen_load']
        .mean()
        .reset_index()
        .rename(coulmns={'pollen_load': 'average_pollen_per_visit'})
    )
    return avg_pollen 

def find_top_flower_for_each_bee(df):
    """
    Identifies, for each bee species, the flower species with the highest average pollen load per visit. 

    Parameters:
    df (pd.DataFrame): The merged DataFrame containing pollen load and visitation information.                   

    Returns:
    pd.DataFrame: DataFrame with 'bee_species', 'flower_species', and 'avg_pollen_per_visit' columns for top flowers.
    """
    # Use the previous function to get average pollen load per bee-flower pair          
    avg_pollen = calculate_average_pollen_per_visit(df)
    
    # For each bee, find the flower with the maximum average pollen load
    top_flower = (
        
avg_pollen.loc[avg_pollen.groupby('bee_species')['avg_pollen_per_visit'].idxmax()]
        .reset_index(drop=True)
    )
    return top_flower

def summarize_visitation_patterns(df):
    """
    Summarizes total visits and mean pollen load for each flower species across all bees.
    
        Parameters:
    df (pd.DataFrame): The merged DataFrame containing pollen load and visitation information.
    
    Returns:
    pd.DataFrame: DataFrame with 'flower_species', 'total_visits', and 'mean_pollen_load' columns.
    """
    # Aggregate total visits and mean pollen load by flower species
    summary = (
        df.groupby('flower_species')
        .agg(total_visits=('visit_id', 'count'), mean_pollen_load=('pollen_load', 'mean'))
        .reset_index()
    )
    return summary

In [31]:
def summarize_visitation_patterns(df):
    """
    Summarizes total visits and mean pollen load for each flower species across all bees.
    
    Parameters:
    df (pd.DataFrame): The merged DataFrame containing pollen load and visitation information.
    
    Returns:
    pd.DataFrame: DataFrame with 'flower_species', 'total_visits', and 'mean_pollen_load' columns.
    """
    # Aggregate total visits and mean pollen load by flower species
    summary = (
        df.groupby('flower_species')
        .agg(total_visits=('visit_id', 'count'), mean_pollen_load=('pollen_load', 'mean'))
        .reset_index()
    )
    return summary

In [32]:
def safe_load_csv(file_paths):
    """Try loading CSV from multiple possible locations"""
    
    for path in file_paths:
        if os.path.exists(path):
            try:
                df = pd.read_csv(path)
                print(f"✅ Successfully loaded: {path}")
                print(f"   Shape: {df.shape}")
                return df
            except Exception as e:
                print(f"❌ Error loading {path}: {e}")
                continue
        else:
            print(f"❌ File not found: {path}")
    
    print("❌ Could not load file from any location")
    return None

# Try multiple possible paths
possible_paths = [
    '2013_2015_flower_visitation_data.csv',           # current directory
    '../data/2013_2015_flower_visitation_data.csv',   # parent/data folder
    './data/2013_2015_flower_visitation_data.csv',    # local data folder
    'data/2013_2015_flower_visitation_data.csv'       # relative data folder
]

flower_visitation_data = safe_load_csv(possible_paths)

if flower_visitation_data is not None:
    print("Data loaded successfully!")
    print(flower_visitation_data.head())

❌ File not found: 2013_2015_flower_visitation_data.csv
✅ Successfully loaded: ../data/2013_2015_flower_visitation_data.csv
   Shape: (2467, 11)
Data loaded successfully!
    Farm Type Round       Date             Species Number   Caste  \
0  Farm1  ELS  Four  11/8/2013    Andrena minutula      1  Female   
1  Farm1  ELS  Four  11/8/2013    Andrena minutula      3  Female   
2  Farm1  ELS  Four   5/8/2014    Andrena minutula      1  Female   
3  Farm1  ELS  Four   5/8/2014  Halictus tumulorum      1    Male   
4  Farm1  ELS  Four   5/8/2014  Halictus tumulorum      3    Male   

                   Visiting Status Purpose      Family  
0     Heracleum sphondylium   Wild  Pollen    Apiaceae  
1     Heracleum sphondylium   Wild  Pollen    Apiaceae  
2     Heracleum sphondylium   Wild  Pollen    Apiaceae  
3  Helminthotheca echioides   Wild  Nectar  Asteraceae  
4     Pulicaria dysenterica   Wild  Nectar  Asteraceae  


In [33]:
# Returns number of unique species visited per farm
unique_species_per_farm = flower_visitation_data.groupby('Farm')['Species'].nunique().reset_index()
unique_species_per_farm.columns = ['Farm', 'UniqueSpeciesCount']
print(unique_species_per_farm)


                                                Farm  UniqueSpeciesCount
1   >>>>>>> 4190a2576b8c680e0fe22605b816e30a71cc96e6                   0
2                                               Farm                   1
3                                              Farm1                  18
4                                             Farm10                  30
5                                             Farm11                  33
6                                             Farm12                  33
7                                             Farm13                  30
8                                             Farm14                  28
9                                             Farm15                  34
10                                            Farm16                  14
11                                            Farm17                   6
12                                            Farm18                   8
13                                            Farm1

In [34]:
# Returns average number of visitors per Species
avg_visitors_per_species = flower_visitation_data.groupby('Species')['Number'].apply(lambda x: pd.to_numeric(x, errors='coerce').mean()).reset_index()
avg_visitors_per_species.columns = ['Species', 'AvgVisitors']
print(avg_visitors_per_species)



                   Species  AvgVisitors
0       Andrena alfkenella     3.272727
1          Andrena bicolor     1.375000
2       Andrena bimaculata     1.000000
3        Andrena bucephala     1.000000
4      Andrena carantonica     1.562500
..                     ...          ...
82     Sphecodes ephippius     1.000000
83   Sphecodes geoffrellus     1.400000
84  Sphecodes monilicornis     1.000000
85    Sphecodes puncticeps     1.000000
86    Sphecodes spinulosus     1.000000

[87 rows x 2 columns]


In [35]:
# Returns farms with more than N visits (parameterized as 3 here)
visit_count_per_farm = flower_visitation_data.groupby('Farm').size().reset_index(name='VisitCount')
farms_more_than_3_visits = visit_count_per_farm[visit_count_per_farm['VisitCount'] > 3]
print(farms_more_than_3_visits)

      Farm  VisitCount
3    Farm1          96
4   Farm10         200
5   Farm11         186
6   Farm12         188
7   Farm13         224
8   Farm14         154
9   Farm15         290
10  Farm16          36
11  Farm17          24
12  Farm18          24
13  Farm19          14
14   Farm2         194
15   Farm3         114
16   Farm4         262
17   Farm5         100
18   Farm6         170
19   Farm7          10
20   Farm8          78
21   Farm9         100
