In [1]:
import numpy as np
import pandas as pd
from pipeline.parse import parse, parse_facebook
from pipeline.join import join
from pipeline.builtin_data import builtin_data

In [2]:
likes = parse_facebook('../examples/facebook-insights.xls')
likes.head()

Unnamed: 0_level_0,New page likes
date,Unnamed: 1_level_1
2019-04-01,10
2019-04-02,19
2019-04-03,23
2019-04-04,12
2019-04-05,9


In [3]:
alcohol = builtin_data()[0]
alcohol.head()

Unnamed: 0_level_0,Alcohol prices
date,Unnamed: 1_level_1
2019-04-01,103.3
2019-05-01,105.0
2019-06-01,105.7
2019-07-01,106.8
2019-08-01,107.187539


In [4]:
alcohol_likes = join([alcohol, likes])
alcohol_likes.head()

Unnamed: 0_level_0,Alcohol prices,New page likes
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-04-01,103.3,10
2019-04-02,,19
2019-04-03,,23
2019-04-04,,12
2019-04-05,,9


In [5]:
interpolated = alcohol_likes.interpolate()
interpolated.head()

Unnamed: 0_level_0,Alcohol prices,New page likes
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-04-01,103.3,10
2019-04-02,103.356667,19
2019-04-03,103.413333,23
2019-04-04,103.47,12
2019-04-05,103.526667,9


In [6]:
rolling_mean = interpolated.rolling(window=32).mean()
rolling_mean = rolling_mean.fillna(method='bfill')
rolling_mean.head()

Unnamed: 0_level_0,Alcohol prices,New page likes
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-04-01,104.177268,14.21875
2019-04-02,104.177268,14.21875
2019-04-03,104.177268,14.21875
2019-04-04,104.177268,14.21875
2019-04-05,104.177268,14.21875


In [7]:
beer_sales = 250 - (rolling_mean['Alcohol prices'].copy() - 100) * 20
beer_sales += np.random.normal(scale=10, size=len(beer_sales))
beer_sales += np.sin(np.linspace(0, 2 * np.pi * len(beer_sales) / 7, len(beer_sales))) * 10
beer_sales += np.sin(np.linspace(0, 2 * np.pi * len(beer_sales) / 365, len(beer_sales))) * 15
np.corrcoef(beer_sales, rolling_mean['Alcohol prices'])

array([[ 1.        , -0.84448032],
       [-0.84448032,  1.        ]])

In [8]:
beer_sales.min()

47.31752905182603

In [9]:
lemonade_sales = 100 + rolling_mean['New page likes'].copy()
lemonade_sales += np.random.normal(scale=4, size=len(lemonade_sales))
lemonade_sales += np.cos(np.linspace(0, 2 * np.pi * len(lemonade_sales) / 7, len(lemonade_sales))) * 1
lemonade_sales += np.cos(np.linspace(0, 2 * np.pi * len(lemonade_sales) / 365, len(lemonade_sales))) * 2
np.corrcoef(lemonade_sales, rolling_mean['New page likes'])

array([[1.        , 0.56576679],
       [0.56576679, 1.        ]])

In [10]:
pd.DataFrame.from_dict({
    'Date': interpolated.index,
    'Lemonade sales': lemonade_sales,
    'Beer sales': beer_sales
}).to_excel('../examples/beverage-sales.xls', index=False)