# Explanatory Analysis

Set up libraries

In [1]:
from typing import List

import pymc
import arviz
import pytensor.tensor as pt
from scipy.stats import kstest, lognorm, norm, beta, truncnorm, halfnorm, ks_1samp
import scipy.stats as stats

from typing import Any
from pymc.distributions.dist_math import check_parameters


import plotly.graph_objects as go
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objects as go
from ipywidgets import FloatSlider, interactive, VBox, HBox, Checkbox, FloatText
import ipywidgets as widgets
from IPython.display import display
from ipywidgets import interact, interactive
import plotly.figure_factory as ff
import pytensor.tensor as pt
from pytensor.scan import scan



import polars as pl
import numpy as np
import plotly.express as px
from sklearn.metrics import r2_score, mean_absolute_error

import plotly.graph_objects as go

In [2]:
import plotly.io as pio
pio.templates.default = 'plotly_white'


In [3]:
data_link = "./Data/data.csv"
df = pl.read_csv(data_link).with_columns(pl.col('week_start').cast(pl.Date)).sort('week_start')

df.head(4)

week_start,new_customers,promo_investment,ppc_brand_investment,ppc_generic_investment,facebook_investment,instagram_investment,year,week,promo_investment_lag0,ppc_brand_investment_lag0,ppc_generic_investment_lag3,facebook_investment_lag2,instagram_investment_lag6,time,sin_52_1,cos_52_1,sin_52_2,cos_52_2
date,i64,f64,f64,f64,f64,f64,i64,i64,f64,f64,f64,f64,f64,i64,f64,f64,f64,f64
2020-05-18,777,1304.227029,756.818857,1512.071585,297.193399,4593.059512,2020,21,1304.227029,756.818857,2300.295763,999.081772,82.890357,113,0.885456,0.464723,0.822984,-0.568065
2020-05-25,978,3000.504127,69.654279,399.82446,0.0,792.564928,2020,22,3000.504127,69.654279,4268.329032,637.97411,1246.32403,114,0.935016,0.354605,0.663123,-0.748511
2020-06-01,950,4609.816216,357.467625,471.992936,1769.420849,3281.127107,2020,23,4609.816216,357.467625,4219.098648,297.193399,1969.201111,115,0.970942,0.239316,0.464723,-0.885456
2020-06-08,800,3429.065037,425.663469,1181.626485,2353.024023,4510.560793,2020,24,3429.065037,425.663469,1512.071585,0.0,2889.605271,116,0.992709,0.120537,0.239316,-0.970942


In [4]:
target = 'new_customers'
date_col = 'week_start'
raw_regressors = ['promo_investment_lag0',
 'ppc_brand_investment_lag0',
 'ppc_generic_investment_lag3',
 'facebook_investment_lag2',
 'instagram_investment_lag6',
 'time',
 'sin_52_1',
 'cos_52_1',
 'sin_52_2',
 'cos_52_2']

How to chose priors? Stan has the answer :wink:
https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations 

In [5]:
alpha_widget = widgets.IntSlider(value=2,
    min=1,
    max=10,
    step=1,
    description='alpha')

beta_widget = widgets.IntSlider(value=2,
    min=1,
    max=10,
    step=1,
    description='beta')

def plot_beta_distribution(alpha, beta_param):

    # Calculate mean and standard deviation
    mean = alpha / (alpha + beta_param)
    std_dev = np.sqrt((alpha * beta_param) / ((alpha + beta_param)**2 * (alpha + beta_param + 1)))

    # Generate x values
    x = np.linspace(0, 1, 500)

    # Generate PDF and CDF values
    pdf_values = beta.pdf(x, alpha, beta_param)
    cdf_values = beta.cdf(x, alpha, beta_param)

    # Create the figure with both plots
    fig = go.Figure()

    # Add the PDF plot
    fig.add_trace(go.Scatter(
        x=x, y=pdf_values,
        mode='lines',
        name='PDF',
        line=dict(color='blue'),
        fill='tozeroy',
        hovertemplate='x: %{x:.2f}PDF: %{y:.2f}'
    ))
    # Add the CDF plot
    fig.add_trace(go.Scatter(
        x=x, y=cdf_values,
        mode='lines',
        name='CDF',
        line=dict(color='orange', dash='dot'),
        hovertemplate='x: %{x:.2f}CDF: %{y:.2f}'
    ))

    # Add annotations for mean and std deviation
    fig.add_trace(go.Scatter(
        x=[mean], y=[beta.pdf(mean, alpha, beta_param)],
        mode='markers+text',
        name='Mean',
        text=[f"Mean: {mean:.2f}\nStd Dev: {std_dev:.2f}"],
        textposition='top right',
        marker=dict(color='red', size=10)
    ))

    # Layout adjustments
    fig.update_layout(
        title=f"Beta Distribution (\u03B1={alpha}, \u03B2={beta_param})",
        xaxis_title="x",
        yaxis_title="Density / Cumulative Probability",
        width=1300, height=600
    )

    # Show the plot
    fig.show()

# Example usage
plot_beta_distribution(alpha=2, beta_param=5)

In [6]:
std_half_widget = widgets.FloatSlider(
    value=1, min=0.01, max=10, step=0.1, description="Std Dev"
)

def plot_halfnormal_distribution(std: float):
    std = np.round(std, 2)
    if std <= 0:
        raise ValueError("Standard deviation must be positive.")

    # Define the x range (only positive side for Half-Normal)
    x = np.linspace(0, 4 * std, 500)

    # Calculate PDF and CDF using scipy.stats.halfnorm
    pdf = halfnorm.pdf(x, scale=std)
    cdf = halfnorm.cdf(x, scale=std)

    # Create the plotly figure
    fig = go.Figure()

    # Add PDF to the plot
    fig.add_trace(
        go.Scatter(
            x=x, y=pdf, mode="lines", name="PDF",
            line=dict(color="blue"),
            fill="tozeroy",
            hovertemplate="PDF: %{y:.4f}X: %{x:.2f}",
        )
    )

    # Add CDF to the plot
    fig.add_trace(
        go.Scatter(
            x=x, y=cdf, mode="lines", name="CDF",
            line=dict(color="green", dash="dash"),
            hovertemplate="CDF: %{y:.4f}X: %{x:.2f}",
        )
    )

    # Add annotations for mean and std deviation
    mean = std * np.sqrt(2 / np.pi)  # Mean of the Half-Normal distribution
    fig.add_trace(
        go.Scatter(
            x=[mean],
            y=[halfnorm.pdf(mean, scale=std)],
            mode="markers",
            name="Mean",
            marker=dict(color="red", size=10, symbol="x"),
            hovertemplate=f"Mean: {mean:.4f}",
        )
    )

    # Update layout
    fig.update_layout(
        title=f"Half-Normal Distribution (std={std})",
        xaxis_title="X",
        yaxis_title="Density",
        legend=dict(title="Legend", orientation="h", y=-0.2),
        template="plotly_white",
        width=1300, height=600,
    )

    # Show the plot
    fig.show()