In [2]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [3]:
data = pd.read_csv("avg_simulation_v5.csv")

data.drop(columns=['Unnamed: 0'], inplace=True)
data.head()

Unnamed: 0,Infectious_time,Population_size,Density,avg_cases
0,1,30,0.01,2.1
1,1,30,0.1,13.84
2,1,30,0.2,18.77
3,1,30,0.5,23.57
4,1,30,0.75,25.01


In [4]:
# Assuming you have your data stored in a DataFrame called 'data'
X = data.drop(columns=['avg_cases'])
y = data['avg_cases']


# Define Generalized Linear Model (GLM) with Poisson distribution family and log link
glm_model_Poisson = sm.GLM(y, X, family=sm.families.Poisson(link=sm.families.links.Log()))

# Fit the model
glm_results_Poisson = glm_model_Poisson.fit()

# Inspect results
print(glm_results_Poisson.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:              avg_cases   No. Observations:                  448
Model:                            GLM   Df Residuals:                      445
Model Family:                 Poisson   Df Model:                            2
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -43142.
Date:                Mon, 27 May 2024   Deviance:                       83539.
Time:                        15:30:56   Pearson chi2:                 1.86e+05
No. Iterations:                     7   Pseudo R-squ. (CS):         -1.621e+13
Covariance Type:            nonrobust                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
Infectious_time     0.2754      0.001    2

Predictor 1: Infectious time 

A one unit increase of infectious time is associated with approximately an 11.31% decrease in the expected count of 


In [5]:
glm_results_Poisson.params

Infectious_time    0.275376
Population_size    0.002989
Density            1.461859
dtype: float64

In [6]:
def calculate_percentage_increase(params):
    """
    Calculates the percentage increase based on the given parameters.

    Args:
        params (array-like): The parameters used for the calculation.

    Returns:
        float: The percentage increase.

    """
    # Calculate the exponential of the parameters and subtract 1
    result = (np.exp(params) - 1) * 100

    return result

percentage_increase = calculate_percentage_increase(glm_results_Poisson.params)

print(percentage_increase)



Infectious_time     31.702601
Population_size      0.299322
Density            331.397059
dtype: float64


In [8]:
# Define Generalized Linear Model (GLM) with Poisson distribution family and log link
glm_model_binomial_Log = sm.GLM(y, X, family=sm.families.Binomial(link=sm.families.links.Log()))

# Fit the model
glm_model_binomial_Log = glm_model_binomial_Log.fit()

# Inspect results
print(glm_model_binomial_Log.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:              avg_cases   No. Observations:                  448
Model:                            GLM   Df Residuals:                      445
Model Family:                Binomial   Df Model:                            2
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                    nan
Date:                Mon, 27 May 2024   Deviance:                      -7682.7
Time:                        15:32:06   Pearson chi2:                 3.99e+22
No. Iterations:                    18   Pseudo R-squ. (CS):                nan
Covariance Type:            nonrobust                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
Infectious_time     0.1167   1.38e-12   8.

  special.gammaln(n - y + 1) + y * np.log(mu / (1 - mu + 1e-20)) +
  n * np.log(1 - mu + 1e-20)) * var_weights


In [10]:
glm_model_Binomial_Logit = sm.GLM(y, X, family=sm.families.Binomial())

# Fit the model
glm_results_Binomial_Logit = glm_model_binomial.fit()

# Inspect results
print(glm_results_Binomial_Logit.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:              avg_cases   No. Observations:                  448
Model:                            GLM   Df Residuals:                      445
Model Family:                Binomial   Df Model:                            2
Link Function:                  Logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                   -inf
Date:                Mon, 27 May 2024   Deviance:                   5.2506e+06
Time:                        15:40:49   Pearson chi2:                 9.92e+22
No. Iterations:                     2   Pseudo R-squ. (CS):                nan
Covariance Type:            nonrobust                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
Infectious_time  2.165e+16   8.21e+05   2.

  special.gammaln(n - y + 1) + y * np.log(mu / (1 - mu + 1e-20)) +
  n * np.log(1 - mu + 1e-20)) * var_weights
