In [36]:
import pandas as pd
import pymc3 as pm
import arviz as az
import theano.tensor as tt
import numpy as np

import matplotlib.pyplot as plt
import pickle

import seaborn as sns

import pystan

import re

In [37]:
def get_mape(y_true, y_pred):
    
    '''
    Mean Absolute Percentage Error
    
    '''
    
    err = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    
    return err

def get_rmse(y_true, y_pred):
    '''
    Root Mean Squared Error
    
    '''
    rmse = (y_true - y_pred)**2
    rmse = np.sqrt(np.mean(rmse))
    
    return rmse 

In [108]:
class MmmDataset:
    
    
    def __init__(self, filename):
        
        # load data
        df = pd.read_csv(filename)
        df = df.set_index('wk_strt_dt')
        
        # extract impressions and rename
        imp_df = df.filter(regex='mdip_.*')
        self.impressions_df = imp_df.rename(columns=lambda x: x.replace('mdip_',''))
        
        # extract spending and rename
        spend_df = df.filter(regex='mdsp_.*')
        self.spend_df = spend_df.rename(columns=lambda x: x.replace('mdsp_',''))
        
        # extract base variabiles (macro economics, store counts, markdowns)
        self.base_vars_df = df.filter(regex='(me_.*)|(st_ct)|(mrkdn_.*)')
        
        # extract holidays
        self.hldy_df = df.filter(regex='(hldy_.*)')
        
        # extract seasonals
        self.seas_df = df.filter(regex='(seas_.*)')
        
        # extract target variable
        self.target_df = df[['sales']]
    
    def get_base_model_data(self):
    
        data = {}
        
        # mean center target
        y = self.target_df.values.reshape(-1)
        y = y / y.mean()
        data['y'] = y
        data['max_intercept'] = y.min()
        
        # mean center base vars
        centered_base_df = self.base_vars_df / self.base_vars_df.mean(axis=0)
        
        # variables with positive constrained coefficients
        data['positive_vars'] = pd.concat([centered_base_df, self.hldy_df], axis=1).values

        # variables with non constrained coefficients
        data['posneg_vars'] = self.seas_df.values

        return data

    def set_baseline_df(self, base_sales):
        
        self.baseline_df = pd.DataFrame({'baseline_sales' : base_sales})
        self.baseline_df.index = self.target_df.index
        