In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

import pandas as pd
from fbprophet import Prophet

<IPython.core.display.Javascript object>

In [3]:
data = pd.read_csv("data/data_set_time_series.csv")
data.head()

Unnamed: 0,date,Final_times_viewed,Final_price
0,2017-11-28,15047.0,14450.54
1,2017-11-29,14159.0,13411.96
2,2017-11-30,15766.0,13290.25
3,2017-12-01,12524.0,9687.28
4,2017-12-02,3313.0,1443.26


<IPython.core.display.Javascript object>

In [4]:
data = data[data.Final_price > 0]
data = data[data.Final_price < data.Final_price.quantile(0.98)]
data

Unnamed: 0,date,Final_times_viewed,Final_price
0,2017-11-28,15047.0,14450.54
1,2017-11-29,14159.0,13411.96
2,2017-11-30,15766.0,13290.25
3,2017-12-01,12524.0,9687.28
4,2017-12-02,3313.0,1443.26
...,...,...,...
490,2019-07-26,7552.0,3631.94
491,2019-07-28,7004.0,3150.26
492,2019-07-29,7025.0,4374.54
493,2019-07-30,8492.0,16374.76


<IPython.core.display.Javascript object>

In [5]:
full_dates = pd.DataFrame(
    pd.date_range(start=data.date.min(), end=data.date.max()), columns=["date"]
)

<IPython.core.display.Javascript object>

In [6]:
data = data.set_index("date").join(full_dates.set_index("date"), how="right")
data.head(20)

Unnamed: 0_level_0,Final_times_viewed,Final_price
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-11-28,15047.0,14450.54
2017-11-29,14159.0,13411.96
2017-11-30,15766.0,13290.25
2017-12-01,12524.0,9687.28
2017-12-02,3313.0,1443.26
2017-12-03,8544.0,6432.94
2017-12-04,12751.0,13137.5
2017-12-05,12604.0,9467.01
2017-12-06,10510.0,11268.54
2017-12-07,11985.0,10976.6


<IPython.core.display.Javascript object>

In [7]:
data["Final_price"].interpolate(method="linear", inplace=True)
data["Final_times_viewed"].interpolate(method="linear", inplace=True)
data.head(20)

Unnamed: 0_level_0,Final_times_viewed,Final_price
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-11-28,15047.0,14450.54
2017-11-29,14159.0,13411.96
2017-11-30,15766.0,13290.25
2017-12-01,12524.0,9687.28
2017-12-02,3313.0,1443.26
2017-12-03,8544.0,6432.94
2017-12-04,12751.0,13137.5
2017-12-05,12604.0,9467.01
2017-12-06,10510.0,11268.54
2017-12-07,11985.0,10976.6


<IPython.core.display.Javascript object>

In [8]:
data["Final_price_scaled"] = data["Final_price"] / data["Final_price"].max()

<IPython.core.display.Javascript object>

# Time Series

In [21]:
import plotly.graph_objects as go


def format_to_prophet(serie_ds, serie_y):
    aux = pd.DataFrame()
    aux["ds"] = serie_ds
    aux["y"] = serie_y
    return aux


def train_predict(
    data,
    periods,
    freq="W",
    plot=False,
    yearly_seasonality=False,
    cps=1,
    changepoint_range=0.8,
):
    model = Prophet(
        yearly_seasonality=yearly_seasonality,
        changepoint_range=changepoint_range,
        changepoint_prior_scale=cps,
    )
    model.fit(data[:-periods])

    future = model.make_future_dataframe(
        periods=periods, freq=freq, include_history=True
    )
    forecast = model.predict(future)

    r2 = round(r2_score(data["y"], forecast["yhat"]), 3)
    mse = round(mean_squared_error(data["y"], forecast["yhat"]), 3)
    mae = round(mean_absolute_error(data["y"], forecast["yhat"]), 3)

    if plot:
        fig = go.Figure()
        fig.add_trace(
            go.Scatter(
                x=forecast["ds"],
                y=forecast["yhat"],
                mode="lines",
                name="Predict Values",
            )
        )
        fig.add_trace(
            go.Scatter(
                x=forecast["ds"], y=forecast["trend"], mode="lines", name="Trend"
            )
        )
        fig.add_trace(
            go.Scatter(
                x=data["ds"],
                y=data["y"],
                mode="lines",
                name="Real Values",
            )
        )
        fig.update_layout(
            title_text=f"Company's visualization",
            yaxis_title=f"Sales",
            xaxis_title="Date",
        )

        fig.show()
        print("R2: ", r2)
        print("MSE: ", mse)
        print("MAE: ", mae)
        return model, forecast, future
    else:
        return {"CPS": cps, "R2": r2, "MSE": mse, "MAE": mae}


data_prophet = format_to_prophet(
    data.reset_index().date, data.reset_index().Final_times_viewed
)


<IPython.core.display.Javascript object>

In [19]:
from joblib import Parallel, delayed
import numpy as np

cps_options = [round(x, 3) for x in np.linspace(start=0.001, stop=5, num=100)]

results = Parallel(n_jobs=-1, verbose=10)(
    delayed(train_predict)(
        data=data_prophet,
        periods=30,
        freq="D",
        plot=False,
        cps=i,
        yearly_seasonality=True,
    )
    for i in cps_options
)

results = pd.DataFrame(results)
results = results[results.R2.isin([max(results.R2)])]
results = results[results.MSE.isin([min(results.MSE)])]
results

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   13.1s
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:   15.0s
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:   22.0s
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:   27.6s
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:   34.8s
[Parallel(n_jobs=-1)]: Done  48 tasks      | elapsed:   45.2s
[Parallel(n_jobs=-1)]: Done  61 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done  74 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done  88 out of 100 | elapsed:  1.7min remaining:   13.8s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:  1.9min finished


Unnamed: 0,CPS,R2,MSE,MAE
11,0.556,0.692,4173690.339,1507.191


<IPython.core.display.Javascript object>

In [22]:
model, forecast, future = train_predict(
    data=data_prophet,
    periods=30,
    freq="D",
    plot=True,
    cps=results.iloc[0]["CPS"],
    yearly_seasonality=True,
)

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


R2:  0.692
MSE:  4173690.339
MAE:  1507.191


<IPython.core.display.Javascript object>

In [35]:
model

<fbprophet.forecaster.Prophet at 0x2057f4ce4c0>

<IPython.core.display.Javascript object>

In [33]:
import joblib

joblib.dump(model, "model/prophet.joblib")

['model/prophet.joblib']

<IPython.core.display.Javascript object>

In [34]:
joblib.load("model/prophet.joblib")

<fbprophet.forecaster.Prophet at 0x2058319cb80>

<IPython.core.display.Javascript object>