In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge

import os
imgdir = 'output_images'
if not os.path.isdir(imgdir):
    os.mkdir(imgdir)

brands = ['Gummies', 'Sugar Buns', 'Sparkle Pop', 'Candys', 'Garlic Bombs', 'Snackmonds', 'Caffeinos', 'moon drops']
spend_types = ['Trade', 'Search', 'Display', 'TV', 'Radio']
num_brands, num_types = len(brands), len(spend_types)

In [2]:
import tqdm
try:
    tqdm_disp = tqdm.tqdm_notebook if get_ipython(
    ).__class__.__name__ == 'ZMQInteractiveShell' else tqdm.tqdm
except NameError:
    tqdm_disp = tqdm.tqdm

In [3]:
num_promos = 10000
start_date = pd.to_datetime('2017-01-01')
end_date = pd.to_datetime('2018-06-01')
dates = pd.date_range(start_date, end_date)
num_dates = len(dates)

In [4]:
promo = pd.DataFrame(
    {'Brand':np.random.choice(brands, num_promos),
     'Type':np.random.choice(spend_types, num_promos), 
     'Spend':np.random.rand(num_promos) * 500,
     'date' : np.random.choice(dates, num_promos)})
promo = promo.groupby(['Brand', 'Type', 'date'])['Spend'].sum().to_frame()
promo = promo.unstack(fill_value=0).stack()
promo.shape

(20680, 1)

In [5]:
assert(promo.shape[0] == num_brands * num_types * num_dates)

In [6]:
true_baselines = {brand: [np.random.randn() * 10] for brand in brands}

In [7]:
spend_rois = {t : [np.random.rand()+.8] for t in spend_types}
brand_rois = {t : [np.random.rand()/3] for t in brands}

In [8]:
brand_spend_combos = pd.DataFrame(data=np.random.rand(num_brands, num_types) - .5, index=brands, columns = spend_types)

In [9]:
brand_spend_combos  = brand_spend_combos + \
    pd.DataFrame(brand_rois, index=spend_rois).T + pd.DataFrame(spend_rois, index = brands)

In [10]:
brand_spend_combos

Unnamed: 0,Trade,Search,Display,TV,Radio
Gummies,1.067037,1.466991,1.330736,1.49742,0.999869
Sugar Buns,1.773734,1.809103,1.32081,2.144407,1.641714
Sparkle Pop,1.168067,1.622066,1.747719,2.403665,1.31004
Candys,1.351434,1.483022,1.323607,2.04557,1.707458
Garlic Bombs,1.248054,2.345776,1.351253,1.560367,1.862435
Snackmonds,2.122485,2.225241,1.718346,1.543132,1.014566
Caffeinos,1.311164,1.757677,2.084707,1.981236,1.46544
moon drops,1.261018,2.394518,1.522242,1.552517,1.392675


In [11]:
promo2 = promo.unstack(1)

In [12]:
promo2.columns = promo2.columns.droplevel(0)
promo2 = promo2.reset_index(1)

In [13]:
effect = promo2.copy()
effect[effect.columns[1:]] = effect[effect.columns[1:]] * brand_spend_combos
effect = effect.reset_index().set_index(['Brand', 'date'])
effect = effect.sum(1).to_frame(name='spend').unstack(0)
effect.columns = effect.columns.droplevel(0)

In [14]:
effect.head()

Brand,Caffeinos,Candys,Garlic Bombs,Gummies,Snackmonds,Sparkle Pop,Sugar Buns,moon drops
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2017-01-01,1207.843141,883.602884,377.618119,206.033412,3091.822177,1811.850004,2452.752085,628.083395
2017-01-02,704.172937,2254.199535,532.758002,777.723307,1500.035978,803.138176,306.054866,864.426531
2017-01-03,2122.046515,599.080636,2599.706336,446.037901,781.033905,625.782205,352.450016,985.109994
2017-01-04,1295.984058,488.671477,776.761049,705.072936,2166.056665,601.568627,1980.627633,449.194602
2017-01-05,1284.243958,1011.729945,311.464837,375.295274,1235.977219,2488.452369,461.059881,1925.857491


In [15]:
salesnoise = pd.DataFrame(data = np.random.rand(num_dates, num_brands) * (-1), columns= brands, index=dates)
salesnoise.head()

Unnamed: 0,Gummies,Sugar Buns,Sparkle Pop,Candys,Garlic Bombs,Snackmonds,Caffeinos,moon drops
2017-01-01,-0.253746,-0.897106,-0.676205,-0.602281,-0.099229,-0.195796,-0.490456,-0.2015
2017-01-02,-0.295051,-0.364747,-0.879481,-0.800957,-0.430922,-0.471775,-0.34106,-0.01594
2017-01-03,-0.161565,-0.743348,-0.929417,-0.909743,-0.396169,-0.344932,-0.58221,-0.985758
2017-01-04,-0.751968,-0.236642,-0.57644,-0.669588,-0.81569,-0.209781,-0.388839,-0.778696
2017-01-05,-0.62966,-0.042112,-0.783427,-0.420464,-0.260097,-0.88875,-0.632332,-0.004422


In [16]:
seasonal_effects = pd.DataFrame(
    {brand: {'cos':np.random.randn() * 10, 'sin':np.random.randn()*10} for brand in brands})

In [17]:
seasonal_effects

Unnamed: 0,Gummies,Sugar Buns,Sparkle Pop,Candys,Garlic Bombs,Snackmonds,Caffeinos,moon drops
cos,-2.321925,-1.162802,8.123728,-3.544766,11.916169,-9.793609,0.916296,3.785876
sin,-1.127175,2.019571,12.095242,-13.59851,-7.671262,-16.210703,3.073552,-11.931129


In [18]:
brand_spend_combos_with_seasonality = brand_spend_combos.join(seasonal_effects.T)

In [19]:
seasonal = salesnoise.index.dayofyear

seasonal_sine, seasonal_cos = np.sin(seasonal*2*np.pi/365), np.cos(seasonal*2*np.pi/365)

In [20]:
seasonal_impact = seasonal_effects.loc[['cos'],:].values * np.expand_dims(seasonal_cos.values, 1) + \
    seasonal_effects.loc[['sin'],:].values * np.expand_dims(seasonal_sine.values, 1)

In [21]:
total_sales = effect + salesnoise + pd.DataFrame(true_baselines).iloc[0,:] + seasonal_impact

In [22]:
# total_sales = pd.DataFrame(
#     data=total_sales.values + np.expand_dims(sine.values, 1) + np.expand_dims(cos.values, 1),
#     index=total_sales.index, columns=total_sales.columns)

In [23]:
total_sales.head()

Unnamed: 0_level_0,Caffeinos,Candys,Garlic Bombs,Gummies,Snackmonds,Sparkle Pop,Sugar Buns,moon drops
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2017-01-01,1199.113723,870.480356,417.09807,212.290671,3099.707978,1801.52198,2456.406746,635.289298
2017-01-02,695.574551,2240.9136,572.110788,783.70683,1507.508495,792.332269,310.294369,871.610996
2017-01-03,2113.189313,585.721508,2639.295873,451.923668,788.492509,614.654823,356.3631,991.116645
2017-01-04,1287.303266,475.588412,816.130475,710.138314,2173.506256,590.525829,1985.099284,455.199381
2017-01-05,1275.303421,998.932214,351.586617,380.254299,1242.600309,2476.937416,465.777591,1932.426735


In [24]:
total_sales = total_sales.stack().to_frame(name='sales')

In [25]:
total_sales.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,sales
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-01-01,Caffeinos,1199.113723
2017-01-01,Candys,870.480356
2017-01-01,Garlic Bombs,417.09807
2017-01-01,Gummies,212.290671
2017-01-01,Snackmonds,3099.707978


In [26]:
promo_viewed = promo.unstack(1)
promo_viewed.columns = promo_viewed.columns.droplevel(0)
promo_viewed = promo_viewed.reset_index().sort_values('date').set_index(['date', 'Brand'])
promo_viewed.head(10)

Unnamed: 0_level_0,Type,Display,Radio,Search,TV,Trade
date,Brand,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-01-01,Caffeinos,384.75952,0.0,230.834463,0.0,0.0
2017-01-01,Sugar Buns,0.0,1332.573261,0.0,123.599569,0.0
2017-01-01,Sparkle Pop,0.0,74.576428,297.026606,512.698372,0.0
2017-01-01,Snackmonds,427.448635,0.0,1059.353692,0.0,0.0
2017-01-01,Gummies,0.0,0.0,0.0,0.0,193.089304
2017-01-01,Garlic Bombs,0.0,0.0,0.0,242.005997,0.0
2017-01-01,Candys,0.0,0.0,0.0,400.894464,47.020613
2017-01-01,moon drops,63.795748,323.417128,0.0,0.0,63.881606
2017-01-02,Snackmonds,0.0,52.742948,0.0,326.309541,444.284019
2017-01-02,Garlic Bombs,394.269618,0.0,0.0,0.0,0.0


In [27]:
sales_viewed = total_sales.clip(0, np.inf)

In [28]:
data_viewed = promo_viewed.join(sales_viewed, on=['date', 'Brand'])

In [29]:
data_viewed.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Display,Radio,Search,TV,Trade,sales
date,Brand,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-01-01,Caffeinos,384.75952,0.0,230.834463,0.0,0.0,1199.113723
2017-01-01,Sugar Buns,0.0,1332.573261,0.0,123.599569,0.0,2456.406746
2017-01-01,Sparkle Pop,0.0,74.576428,297.026606,512.698372,0.0,1801.52198
2017-01-01,Snackmonds,427.448635,0.0,1059.353692,0.0,0.0,3099.707978
2017-01-01,Gummies,0.0,0.0,0.0,0.0,193.089304,212.290671


In [30]:
spend = data_viewed.drop('sales', 1)
Y = data_viewed[['sales']]
spend.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Display,Radio,Search,TV,Trade
date,Brand,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-01-01,Caffeinos,384.75952,0.0,230.834463,0.0,0.0
2017-01-01,Sugar Buns,0.0,1332.573261,0.0,123.599569,0.0
2017-01-01,Sparkle Pop,0.0,74.576428,297.026606,512.698372,0.0
2017-01-01,Snackmonds,427.448635,0.0,1059.353692,0.0,0.0
2017-01-01,Gummies,0.0,0.0,0.0,0.0,193.089304
2017-01-01,Garlic Bombs,0.0,0.0,0.0,242.005997,0.0
2017-01-01,Candys,0.0,0.0,0.0,400.894464,47.020613
2017-01-01,moon drops,63.795748,323.417128,0.0,0.0,63.881606
2017-01-02,Snackmonds,0.0,52.742948,0.0,326.309541,444.284019
2017-01-02,Garlic Bombs,394.269618,0.0,0.0,0.0,0.0


In [31]:
seasonal_X = spend.reset_index()['date'].dt.dayofyear

sine_X, cos_X = np.sin(seasonal_X*2*np.pi/365), np.cos(seasonal_X*2*np.pi/365)
#sine_X = sine_X.to_frame('sine')
#cos_X = cos_X.to_frame('cos')

In [32]:
dummies = pd.get_dummies(spend.reset_index()['Brand'])
dummies = dummies.set_index(spend.index)

In [33]:
spend['sin'] = sine_X.values
spend['cos'] = cos_X.values

In [34]:
spend

Unnamed: 0_level_0,Unnamed: 1_level_0,Display,Radio,Search,TV,Trade,sin,cos
date,Brand,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2017-01-01,Caffeinos,384.759520,0.000000,230.834463,0.000000,0.000000,0.017213,0.999852
2017-01-01,Sugar Buns,0.000000,1332.573261,0.000000,123.599569,0.000000,0.017213,0.999852
2017-01-01,Sparkle Pop,0.000000,74.576428,297.026606,512.698372,0.000000,0.017213,0.999852
2017-01-01,Snackmonds,427.448635,0.000000,1059.353692,0.000000,0.000000,0.017213,0.999852
2017-01-01,Gummies,0.000000,0.000000,0.000000,0.000000,193.089304,0.017213,0.999852
2017-01-01,Garlic Bombs,0.000000,0.000000,0.000000,242.005997,0.000000,0.017213,0.999852
2017-01-01,Candys,0.000000,0.000000,0.000000,400.894464,47.020613,0.017213,0.999852
2017-01-01,moon drops,63.795748,323.417128,0.000000,0.000000,63.881606,0.017213,0.999852
2017-01-02,Snackmonds,0.000000,52.742948,0.000000,326.309541,444.284019,0.034422,0.999407
2017-01-02,Garlic Bombs,394.269618,0.000000,0.000000,0.000000,0.000000,0.034422,0.999407


In [35]:
spend.reset_index().set_index('date')['Brand'].to_frame().head()

Unnamed: 0_level_0,Brand
date,Unnamed: 1_level_1
2017-01-01,Caffeinos
2017-01-01,Sugar Buns
2017-01-01,Sparkle Pop
2017-01-01,Snackmonds
2017-01-01,Gummies


In [36]:
dummies.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Caffeinos,Candys,Garlic Bombs,Gummies,Snackmonds,Sparkle Pop,Sugar Buns,moon drops
date,Brand,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2017-01-01,Caffeinos,1,0,0,0,0,0,0,0
2017-01-01,Sugar Buns,0,0,0,0,0,0,1,0
2017-01-01,Sparkle Pop,0,0,0,0,0,1,0,0
2017-01-01,Snackmonds,0,0,0,0,1,0,0,0
2017-01-01,Gummies,0,0,0,1,0,0,0,0


In [37]:
dummy_spends = {}
for dummy in dummies.columns:
    brand_spend = pd.DataFrame(
        dummies[dummy].to_frame().values * spend.values, 
        index=spend.index, columns=['type:' + c + '_brand:' + dummy for c in spend.columns])
    dummy_spends[dummy] = brand_spend

In [38]:
Xraw = pd.concat((spend, dummies, *list(dummy_spends.values())), 1)

In [39]:
xmeans, xstds = Xraw.mean(), Xraw.std()

In [40]:
X = (Xraw - xmeans) / xstds

In [41]:
clf = Ridge(1e-5).fit(X, Y)

In [42]:
import tensorflow as tf
lr = tf.placeholder(tf.float32, ())
x_in = tf.placeholder(tf.float32, (None, X.shape[1]))
y_in = tf.placeholder(tf.float32, (None))
w = tf.Variable(np.random.randn(X.shape[1]), dtype=tf.float32)
b = tf.Variable(0, dtype=tf.float32)
yhat = x_in * tf.expand_dims(w, 0) + b
loss = tf.reduce_mean(tf.square(y_in-yhat)) + 1e-5 * (tf.reduce_sum(tf.abs(w)))

In [43]:
optim = tf.train.AdamOptimizer(lr).minimize(loss)

feed_dict = {
    x_in:X, y_in:Y
}

s = tf.Session()
s.run(tf.global_variables_initializer())
for i in range(100):
    feed_dict[lr] = 1e2/np.sqrt(i+1)
    l, _ = s.run([loss, optim], feed_dict)
    if i % 10 == 0:
        print(l)

1549953.9
812644.06
645033.4
589916.5
573750.6
570011.0
569472.06
569481.2
569497.8
569487.06


In [44]:
optim = tf.contrib.opt.ScipyOptimizerInterface(
        loss
    )
feed_dict = {
    x_in:X, y_in:Y
}

s = tf.Session()
s.run(tf.global_variables_initializer())

with tqdm_disp() as tq:
    y_hat_trace = []

    def loss_callback(y_hat, loss):
        y_hat_trace.append(y_hat)
        tq.update()
        tq.set_postfix_str('loss = %.6f' % (loss))

    optim.minimize(
        s,
        feed_dict=feed_dict,
        fetches=[yhat, loss],
        loss_callback=loss_callback,
    )
    y_hat = y_hat_trace[-1]

A Jupyter Widget

INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: 569458.812500
  Number of iterations: 6
  Number of functions evaluations: 30



In [45]:
s.run(loss, feed_dict)

569458.8

In [46]:
coef_tf = s.run(w)

In [47]:
clf.coef_

array([[ 1.76032491e+02,  1.64997631e+02,  2.20036500e+02,
         2.12653148e+02,  1.57066799e+02, -1.41199620e+00,
         3.28230525e-01, -2.89989019e+00, -4.37102116e+00,
         8.75323460e+00,  1.93769470e+00, -2.15225536e+00,
        -8.31435828e-01, -3.75972972e-01, -6.02887199e-02,
         9.57023995e+01,  5.13193775e+01,  5.21651432e+01,
         8.41444184e+01,  3.99999193e+01,  2.47680374e-01,
        -6.27718410e-01,  3.52005821e+01,  7.07630431e+01,
         3.20028537e+01,  7.71922894e+01,  4.46487368e+01,
         1.02752760e+00, -4.14940716e-01,  4.00900237e+01,
         9.04122607e+01,  1.02582753e+02,  4.56798833e+01,
         3.78230401e+01,  3.60779224e+00,  1.85681099e+00,
         3.77557990e+01,  1.38663452e+01,  3.23633030e+01,
         3.76862770e+01,  2.26010831e+01, -2.89742515e+00,
        -9.66953954e-01,  6.79222847e+01,  1.49618407e+01,
         1.00190637e+02,  4.55377030e+01,  1.02029385e+02,
        -1.29956784e+00,  2.57709999e+00,  7.41035197e+0

In [48]:
coef = clf.coef_ / xstds.values
#coef = np.expand_dims(coef_tf, 0) / xstds.values

In [49]:
display = pd.DataFrame(coef, columns=X.columns)

In [50]:
aggregated = spend.columns
bias = dummies.columns
deltas = np.array([col for col in X.columns if col not in aggregated and col not in bias])

In [51]:
['bias'] + spend.columns

Index(['biasDisplay', 'biasRadio', 'biasSearch', 'biasTV', 'biasTrade',
       'biassin', 'biascos'],
      dtype='object')

In [52]:
results = pd.DataFrame(data=np.zeros((num_brands+1, spend.columns.shape[0]+1)), index=['aggregated'] + brands, columns = ['bias'] + list(spend.columns))

In [53]:
results

Unnamed: 0,bias,Display,Radio,Search,TV,Trade,sin,cos
aggregated,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Gummies,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Sugar Buns,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Sparkle Pop,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Candys,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Garlic Bombs,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Snackmonds,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Caffeinos,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
moon drops,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [54]:
results.loc[['aggregated'], aggregated]


Unnamed: 0,Display,Radio,Search,TV,Trade,sin,cos
aggregated,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [55]:
display[aggregated]

Unnamed: 0,Display,Radio,Search,TV,Trade,sin,cos
0,0.883327,0.82155,1.073923,1.029299,0.793412,-2.03678,0.477488


In [56]:
display

Unnamed: 0,Display,Radio,Search,TV,Trade,sin,cos,Caffeinos,Candys,Garlic Bombs,...,type:Trade_brand:Sugar Buns,type:sin_brand:Sugar Buns,type:cos_brand:Sugar Buns,type:Display_brand:moon drops,type:Radio_brand:moon drops,type:Search_brand:moon drops,type:TV_brand:moon drops,type:Trade_brand:moon drops,type:sin_brand:moon drops,type:cos_brand:moon drops
0,0.883327,0.82155,1.073923,1.029299,0.793412,-2.03678,0.477488,-8.767384,-13.215128,26.464094,...,0.980247,5.113708,0.406249,0.637975,0.570333,1.319809,0.522426,0.46679,-9.560776,3.119793


In [57]:
display['type:Display_brand:Gummies']

0    0.447105
Name: type:Display_brand:Gummies, dtype: float64

In [58]:
results.loc[['aggregated'], aggregated] = display[aggregated].values
for brand in brands:
    results.loc[brand, 'bias'] = display[brand].values
    results.loc[brand, aggregated] = display.loc[:,[b for b in deltas if brand in b]].values + display[aggregated].values

In [59]:
results

Unnamed: 0,bias,Display,Radio,Search,TV,Trade,sin,cos
aggregated,0.0,0.883327,0.82155,1.073923,1.029299,0.793412,-2.03678,0.477488
Gummies,5.85833,1.330433,0.999534,1.466546,1.496984,1.06672,-13.409979,-3.490242
Sugar Buns,-1.136698,1.320759,1.641733,1.809042,2.144314,1.773659,3.076928,0.883737
Sparkle Pop,-2.513722,1.745497,1.307778,1.620176,2.401275,1.166134,-15.364534,-9.197491
Candys,-13.215128,1.320051,1.70444,1.480067,2.042009,1.348463,1.996551,-1.22515
Garlic Bombs,26.464094,1.351294,1.862495,2.345777,1.560371,1.248118,12.124807,8.096594
Snackmonds,-6.507022,1.716132,1.012258,2.223507,1.541367,2.120271,-7.137945,11.052176
Caffeinos,-8.767384,2.082411,1.463322,1.755449,1.979376,1.309327,-1.064566,-2.098247
moon drops,-0.182274,1.521302,1.391883,2.393733,1.551725,1.260202,-11.597556,3.597281


In [70]:
import calendar

In [77]:
def add_month(df):
    df = df.copy()

    df['phase'] = np.angle((
            df['sin'] * 1j + df['cos']).astype(complex))
    df['peak_month'] = ((
            (df['phase'] * 12 / 2 / np.pi)) % 12).round().astype(int)
    df.loc[df['peak_month'] == 0, 'peak_month'] = 12
    df['peak_month'] = df['peak_month'].apply(
        lambda x: calendar.month_abbr[x])
    df['seasonable_impact'] = np.sqrt(
        (np.square(df['sin']) + np.square(
            df['cos'])).astype('float'))
    df['seasonable_impact'] = df[
        'seasonable_impact'] / df['seasonable_impact'].std()
    return df

In [80]:
groundtruth = add_month(brand_spend_combos_with_seasonality)
results2 = add_month(results)

In [84]:
groundtruth[list(aggregated) + ['peak_month', 'seasonable_impact']]

Unnamed: 0,Display,Radio,Search,TV,Trade,sin,cos,peak_month,seasonable_impact
Gummies,1.330736,0.999869,1.466991,1.49742,1.067037,-1.127175,-2.321925,Jul,0.394085
Sugar Buns,1.32081,1.641714,1.809103,2.144407,1.773734,2.019571,-1.162802,Apr,0.355814
Sparkle Pop,1.747719,1.31004,1.622066,2.403665,1.168067,12.095242,8.123728,Feb,2.224626
Candys,1.323607,1.707458,1.483022,2.04557,1.351434,-13.59851,-3.544766,Sep,2.145652
Garlic Bombs,1.351253,1.862435,2.345776,1.560367,1.248054,-7.671262,11.916169,Nov,2.16382
Snackmonds,1.718346,1.014566,2.225241,1.543132,2.122485,-16.210703,-9.793609,Aug,2.89174
Caffeinos,2.084707,1.46544,1.757677,1.981236,1.311164,3.073552,0.916296,Feb,0.489691
moon drops,1.522242,1.392675,2.394518,1.552517,1.261018,-11.931129,3.785876,Oct,1.911198


In [85]:
results2[list(aggregated) + ['peak_month', 'seasonable_impact']]

Unnamed: 0,Display,Radio,Search,TV,Trade,sin,cos,peak_month,seasonable_impact
aggregated,0.883327,0.82155,1.073923,1.029299,0.793412,-2.03678,0.477488,Sep,0.325222
Gummies,1.330433,0.999534,1.466546,1.496984,1.06672,-13.409979,-3.490242,Sep,2.154169
Sugar Buns,1.320759,1.641733,1.809042,2.144314,1.773659,3.076928,0.883737,Feb,0.497678
Sparkle Pop,1.745497,1.307778,1.620176,2.401275,1.166134,-15.364534,-9.197491,Aug,2.783831
Candys,1.320051,1.70444,1.480067,2.042009,1.348463,1.996551,-1.22515,Apr,0.364162
Garlic Bombs,1.351294,1.862495,2.345777,1.560371,1.248118,12.124807,8.096594,Feb,2.26655
Snackmonds,1.716132,1.012258,2.223507,1.541367,2.120271,-7.137945,11.052176,Nov,2.045353
Caffeinos,2.082411,1.463322,1.755449,1.979376,1.309327,-1.064566,-2.098247,Jul,0.365775
moon drops,1.521302,1.391883,2.393733,1.551725,1.260202,-11.597556,3.597281,Oct,1.887695


In [63]:
display_aggregated = display.rename({col: 'aggregated:'+col for col in spend.columns}, axis=1)

In [64]:
display_aggregated

Unnamed: 0,aggregated:Display,aggregated:Radio,aggregated:Search,aggregated:TV,aggregated:Trade,aggregated:sin,aggregated:cos,Caffeinos,Candys,Garlic Bombs,...,type:Trade_brand:Sugar Buns,type:sin_brand:Sugar Buns,type:cos_brand:Sugar Buns,type:Display_brand:moon drops,type:Radio_brand:moon drops,type:Search_brand:moon drops,type:TV_brand:moon drops,type:Trade_brand:moon drops,type:sin_brand:moon drops,type:cos_brand:moon drops
0,0.883327,0.82155,1.073923,1.029299,0.793412,-2.03678,0.477488,-8.767384,-13.215128,26.464094,...,0.980247,5.113708,0.406249,0.637975,0.570333,1.319809,0.522426,0.46679,-9.560776,3.119793


In [65]:
X.columns.shape

(71,)