In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import seaborn as sns
import os
from scipy.stats import nbinom
from scipy.stats import poisson
import CRPS.CRPS as pscore

# create the feature- and actuals-data list
# set the feature and actuals year lists
feature_years = ['2017','2018','2019','2020']
actual_years = ['2018','2019','2020','2021']

actuals_df_list = []
features_df_list = []

for i in range(len(feature_years)):
    # paths to the data
    absolute_path = os.path.abspath('')
    relative_path_features = "data\cm_features_to_oct" + feature_years[i] + ".parquet"
    relative_path_actuals = "data\cm_actuals_" + actual_years[i] + ".parquet"

    path_features = os.path.join(absolute_path, relative_path_features)
    path_actuals = os.path.join(absolute_path, relative_path_actuals)

    # append datasets to the lists
    actuals_df_list.append({'year':actual_years[i], 'data':pd.read_parquet(path_actuals, engine='pyarrow')})
    features_df_list.append({'year':feature_years[i], 'data':pd.read_parquet(path_features, engine='pyarrow')})

# concat the feature datasets, so that every data contains the observations to_oct_17
for i in range(1,len(features_df_list)):
    features_df_list[i]['data'] = pd.concat([features_df_list[i-1]['data'], features_df_list[i]['data']])

# function to check, if the last 36 months are in the dataset of a country
def check_last_36Months(country, yearindex):
    month_list = features_df_list[yearindex]['data'].index.get_level_values('month_id').unique().tolist()
    month_list = month_list[-36:]
     
    last_36_months = True
    for month in month_list:
        if month not in country.index.get_level_values('month_id'):
            last_36_months = False
            break

    return last_36_months


# list of all countries that are present in all four datasets
country_list = []
for i in range(len(features_df_list)):
    country_list.extend(features_df_list[i]['data'].index.get_level_values('country_id').unique().tolist())

unique_list = []

for item in country_list:
    if country_list.count(item) == 4:
        unique_list.append(item)

country_list = list(set(unique_list))

# country group list of all four datasets
country_feature_group_list = []
country_actual_group_list = []
# fill list 
for i in range(len(features_df_list)):
    country_feature_group_list.append(features_df_list[i]['data'].groupby('country_id'))
    country_actual_group_list.append(actuals_df_list[i]['data'].groupby('country_id'))


# modify country_list so that it contains only country_ids 
# that have the last 36 months of observations in ALL DATASETS!
dummy_list = []
for countryIndex in country_list:
    # loop through datasets
    for i in range(len(features_df_list)):
        dummy_hasLast36_months = True
        if check_last_36Months(country_feature_group_list[i].get_group(countryIndex), i) is not True:
            dummy_hasLast36_months = False
    
    if dummy_hasLast36_months is True:
        dummy_list.append(countryIndex)

# the values in country_list are the 'country_id'
country_list = dummy_list

In [None]:
import statsmodels.discrete.truncated_model as smtc
from statsmodels.discrete.discrete_model import (
    Poisson, NegativeBinomial, NegativeBinomialP, GeneralizedPoisson)
from statsmodels.discrete.count_model import (
    ZeroInflatedPoisson,
    ZeroInflatedGeneralizedPoisson,
    ZeroInflatedNegativeBinomialP
    )

from statsmodels.discrete.truncated_model import (
    TruncatedLFPoisson,
    TruncatedLFNegativeBinomialP,
    _RCensoredPoisson,
    HurdleCountModel,
    )

In [10]:
from statsmodels.discrete.count_model import ZeroInflatedNegativeBinomial

data = country_feature_group_list[0].get_group(246)

# Hurdle-Modell erstellen
model = ZeroInflatedNegativeBinomial(data.loc[:,'ged_sb'], np.ones_like(data.loc[:,'ged_sb']), exog_infl=np.ones_like(data.loc[:,'ged_sb']))

# Modell anpassen
fit_model = model.fit()

# Vorhersagen aus dem Modell generieren
predictions = fit_model.predict()

# Empirische Verteilung aus den Vorhersagen ziehen
simulated_data = np.random.negative_binomial(predictions, fit_model.alpha, size=len(data))

# Datenplot
plt.scatter(data.loc[:,'month_id'], data.loc[:,'ged_sb'], label='Daten')
plt.scatter(data.loc[:,'month_id'], simulated_data, label='Simulierte Daten')
plt.xlabel('Month ID')
plt.ylabel('Fatalities')
plt.legend()
plt.show()


ImportError: cannot import name 'ZeroInflatedNegativeBinomial' from 'statsmodels.discrete.count_model' (c:\Users\Tobias\AppData\Local\Programs\Python\Python311\Lib\site-packages\statsmodels\discrete\count_model.py)

KeyError: 'month_id'