# Explanatory Analysis

Set up libraries

In [2]:
import scipy
from datetime import date
import polars as pl
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import ipywidgets as widgets
from IPython.display import display
from ipywidgets import interact, interactive
import numpy as np
import statsmodels.api as sm
from sklearn.metrics import r2_score, mean_absolute_error
import matplotlib.pyplot as plt
import seaborn as sns


In [3]:
import plotly.io as pio
pio.templates.default = 'plotly_white'


In [4]:
data_link = "./Data/data.csv"
df = pl.read_csv(data_link).with_columns(pl.col('week_start').cast(pl.Date)).sort('week_start')

df.head(4)

week_start,new_customers,promo_investment,ppc_brand_investment,ppc_generic_investment,facebook_investment,instagram_investment,year,week,promo_investment_lag0,ppc_brand_investment_lag0,ppc_generic_investment_lag3,facebook_investment_lag2,instagram_investment_lag6,time,sin_52_1,cos_52_1,sin_52_2,cos_52_2
date,i64,f64,f64,f64,f64,f64,i64,i64,f64,f64,f64,f64,f64,i64,f64,f64,f64,f64
2020-05-18,777,1304.227029,756.818857,1512.071585,297.193399,4593.059512,2020,21,1304.227029,756.818857,2300.295763,999.081772,82.890357,113,0.885456,0.464723,0.822984,-0.568065
2020-05-25,978,3000.504127,69.654279,399.82446,0.0,792.564928,2020,22,3000.504127,69.654279,4268.329032,637.97411,1246.32403,114,0.935016,0.354605,0.663123,-0.748511
2020-06-01,950,4609.816216,357.467625,471.992936,1769.420849,3281.127107,2020,23,4609.816216,357.467625,4219.098648,297.193399,1969.201111,115,0.970942,0.239316,0.464723,-0.885456
2020-06-08,800,3429.065037,425.663469,1181.626485,2353.024023,4510.560793,2020,24,3429.065037,425.663469,1512.071585,0.0,2889.605271,116,0.992709,0.120537,0.239316,-0.970942


### Quality Check 	✅

In [11]:
def check_complete_datarange(df: pl.DataFrame,
                             date_col: str = 'week_start'):

  min_date = df.select(pl.col(date_col).min()).item()
  max_date = df.select(pl.col(date_col).max()).item()
  date_range = pl.date_range(start=min_date, end=max_date, interval="1w",
                             eager=True).to_list()
  print(set(date_range))
  assert set(date_range) == set(df[date_col].to_list())


In [None]:
check_complete_datarange(df)

In [None]:
df.describe()

### Know your target ⭕

In [6]:
df.columns

['week_start',
 'new_customers',
 'promo_investment',
 'ppc_brand_investment',
 'ppc_generic_investment',
 'facebook_investment',
 'instagram_investment',
 'year',
 'week',
 'promo_investment_lag0',
 'ppc_brand_investment_lag0',
 'ppc_generic_investment_lag3',
 'facebook_investment_lag2',
 'instagram_investment_lag6',
 'time',
 'sin_52_1',
 'cos_52_1',
 'sin_52_2',
 'cos_52_2']

In [7]:
target = "new_customers"
date_col = 'week_start'
channels = ['promo_investment',
            'ppc_brand_investment',
            'ppc_generic_investment',
            'facebook_investment',
            'instagram_investment']

### Trend Analysis 📈

In [10]:
rolling_window_widget = widgets.IntSlider(value=1,
    min=1,
    max=40,
    step=1,
    description='Window')


def update_rolling_plot(window):

  base_title = f"{target} Evolution Over Time"
  title = base_title if window == 1 else base_title +  f"- Rolling Mean (Window = {window})"

  rolled_df = df.with_columns(pl.col(target).rolling_mean(window))
  fig = px.line(rolled_df, x=date_col, y=target,
                title=title)
  #fig.update_layout(width=1300, height=400)
  fig.show(renderer="colab")

interactive_plot = interactive(update_rolling_plot,
                               window=rolling_window_widget)

display(interactive_plot)

interactive(children=(IntSlider(value=1, description='Window', max=40, min=1), Output()), _dom_classes=('widge…

🔍 Observations <br>
⬛ Increasing trend over time, except in 2023, where sales show a decline. <br>
⬛ Presence of seasonality.

### Seasonality Analysis 📈

In [13]:
fig = px.line(df.with_columns(pl.col(target).rolling_mean(5)),
        x='week',
        y=target,
        color = 'year',
        hover_data=[date_col],
        title='New Customers Comparison Over Years')

fig.show()