In [55]:
import pandas as pd
import rpy2
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf


In [77]:
# LOAD BOMBUS VISIT TIMES, EXTACT SPECIES ABUNDANCE DISTRIBUTION
visit_durations_clover = pd.read_csv('csvs/visit_durations_clover.csv', parse_dates=['visit_start', 'visit_end'])
species_count = visit_durations_clover.value_counts('species').reset_index()
species_count = species_count[species_count['species'] != 'unk']
print(species_count)

        species  count
0  vosnesenskii    173
1      fervidus     70
3  griseocollis     13
4    nevadensis      2
5     appositus      1
6        mixtus      1


In [57]:
cnt_visits_per_hr = pd.read_csv('csvs/cnt_visits_per_hour.csv', parse_dates=['date'])

In [58]:
# FIT BASELINE POISSON MODEL

cnt_visits_per_hr['log_hours'] = np.log(cnt_visits_per_hr['hours']) 

model_pois = smf.glm(
    formula="visit_count ~ 1", 
    data=cnt_visits_per_hr,
    family=sm.families.Poisson(),
    offset=cnt_visits_per_hr['log_hours']
).fit()

pearson_chi2 = sum(model_pois.resid_pearson**2) ## CHECK FOR OVERDISPERSION (>1.5?)
dispersion = pearson_chi2 / model_pois.df_resid
print("Dispersion =", dispersion)
if dispersion > 1.5:
    print("OVERDISPERSED USE NEG BIONOMIAL")

Dispersion = 3.316910978226268
OVERDISPERSED USE NEG BIONOMIAL


In [69]:
# FIT NEGATIVE BINOMIAL MODEL DUE TO OVERDISPERSION
model_nb = smf.glm(
    formula="visit_count ~ 1",
    data=cnt_visits_per_hr,
    family=sm.families.NegativeBinomial(),
    offset=cnt_visits_per_hr['log_hours']
).fit()

print(model_nb.summary())

rate_per_hour = np.exp(model_nb.params['Intercept'])

print("\nEstimated visit rate per hour:", rate_per_hour)
print()

# CHECK FOR ZERO-INFLATION
observed_zero_prop = (cnt_visits_per_hr['visit_count'] == 0).mean()
mu = model_nb.predict()
alpha = model_nb.scale 
predicted_zero_prob = np.mean((1 + alpha * mu) ** (-1/alpha))

print("Observed zero proportion:", observed_zero_prop)
print("Predicted zero (NB) proportion:", predicted_zero_prob)
print()
if predicted_zero_prob > observed_zero_prop:
    print('NO ZERO INFLATION NECESSARY')

                 Generalized Linear Model Regression Results                  
Dep. Variable:            visit_count   No. Observations:                   35
Model:                            GLM   Df Residuals:                       34
Model Family:        NegativeBinomial   Df Model:                            0
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -74.101
Date:                Wed, 15 Oct 2025   Deviance:                       31.951
Time:                        12:06:26   Pearson chi2:                     30.1
No. Iterations:                     5   Pseudo R-squ. (CS):              0.000
Covariance Type:            nonrobust                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.4204      0.201     -7.070      0.0



In [81]:

mu_14h = 0.24161904774478468 * 14 # RATE * 14 HOURS
species_list = species_count['species'].values
p_species = species_count['count'] / species_count['count'].sum()

def rneg_binomial(mu): # NEG BINOMIAL DRAW FUNCTION
    # Var = mu + alpha*mu^2 (alpha=1)
    p = 1 / (1 + mu) # success probability
    return np.random.negative_binomial(1, p)

In [83]:
n_iter = 10

bootstrap_results = []

for b in range(n_iter):
    total_visits = max(rneg_binomial(mu_14h), 0)

    if total_visits > 0:
        sim_counts = np.random.multinomial(total_visits, p_species)
    else:
        sim_counts = np.zeros_like(p_species, dtype=int)

    bootstrap_results.append(dict(zip(species_list, sim_counts)))

sim_df = pd.DataFrame(bootstrap_results)
sim_df

Unnamed: 0,vosnesenskii,fervidus,griseocollis,nevadensis,appositus,mixtus
0,4,1,0,0,0,0
1,0,1,0,0,0,0
2,0,0,0,0,0,1
3,0,0,0,0,0,0
4,0,0,0,0,0,0
5,1,0,0,0,0,0
6,2,1,0,0,0,0
7,0,0,0,0,0,0
8,10,3,0,0,0,0
9,0,0,0,0,0,0
