In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from hmmlearn import hmm

In [None]:
fp = "../data/daily_baugette_sales.csv"
df = pd.read_csv(fp)

In [None]:
CUT_OFF = df.CWOY.max() - 40

In [None]:
select_cut_off = df.CWOY >= CUT_OFF
df = df[select_cut_off]

In [None]:
cols_needed = ["datetime", "Quantity"]
df = df[cols_needed]

In [None]:
df["Quantity"].plot.kde()
plt.grid(True)

In [None]:
fig = plt.figure()
fig.set_size_inches(10, 6, forward=True)
fig.subplots_adjust(hspace=0.3, wspace=0.5)

plt.subplot(121)
df["Quantity"].plot.kde()
plt.grid(True)
plt.title("Daily Baguette Sales - KDE")
plt.subplot(122)
plt.title("Daily Baguette Sales - Histogram")
df["Quantity"].plot.hist()
fig.tight_layout()
plt.grid(True)

In [None]:
fp = "../data/daily_baguette_last_40_weeks.csv"
df.to_csv(fp, index=True)

In [None]:
import unicodedata as ud

summary_stats = {"mean": df.Quantity.mean().round(3), "var": df.Quantity.var().round(3)}

In [None]:
df_summ_stats = pd.DataFrame.from_dict(summary_stats, orient="index")

In [None]:
df_summ_stats = df_summ_stats.reset_index()
df_summ_stats.columns = ["Parameter", "Value"]

In [None]:
df_summ_stats

In [None]:
import numpy as np
import scipy.stats as stats

In [None]:
df_fit = df.iloc[:-14,:]

In [None]:
# Fit the gamma distribution
params = stats.gamma.fit(df_fit["Quantity"])

In [None]:
# Extract the fitted parameters
fitted_alpha, fitted_loc, fitted_beta = params

In [None]:
print("Fitted shape parameter (alpha):", fitted_alpha)
print("Fitted location parameter (loc):", fitted_loc)
print("Fitted scale parameter (beta):", fitted_beta)

In [None]:
from scipy.stats import nbinom

Check out [this wikipedia link](https://en.wikipedia.org/wiki/Negative_binomial_distribution#Gamma%E2%80%93Poisson_mixture) and then use the fact that $n$ is the shape parameter, and the scale parameter $p = \frac{p}{(1-p)}$, solve for $p$ which is $\frac{1}{6.97}$

In [None]:
n= 9.42
p = 1/6.97
NUM_SAMPLES = 2000

In [None]:
r1 = nbinom.rvs(n,p,size=NUM_SAMPLES)
r2 = nbinom.rvs(n,p,size=NUM_SAMPLES)

In [None]:
df_gen = pd.DataFrame.from_records({"xi": r1, "yi": r2})

In [None]:
df_gen.columns = ["xi", "yi"]

In [None]:
fp = "../data/samples_for_stoch_estimation.csv"
df_gen.to_csv(fp, index=False)

In [None]:
sample = {"actual": df_fit["Quantity"], "fitted": nbinom.rvs(n,p,size=df_fit["Quantity"].shape[0])}
df_sample = pd.DataFrame.from_records(sample)

In [None]:
fig = plt.figure()
fig.set_size_inches(10, 6, forward=True)
fig.subplots_adjust(hspace=0.3, wspace=0.5)

plt.subplot(121)
df_sample["actual"].plot.kde()
plt.grid(True)
plt.title("actual")
plt.subplot(122)
plt.title("fitted (from samples drawn)")
df_sample["fitted"].plot.kde()
fig.tight_layout()
plt.grid(True)