In [12]:
import warnings
warnings.filterwarnings('ignore')
import os
import pandas as pd

from plotly import __version__
print(__version__) # need 1.9.0 or greater
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly import graph_objs as go
import requests
import pandas as pd

init_notebook_mode(connected = True)

def plotly_df(df, title = ''):
    data = []
    
    for column in df.columns:
        trace = go.Scatter(
            x = df.index,
            y = df[column],
            mode = 'lines',
            name = column
        )
        data.append(trace)
    
    layout = dict(title = title)
    fig = dict(data = data, layout = layout)
    iplot(fig, show_link=False)
    
%matplotlib inline
import matplotlib.pyplot as plt
from scipy import stats
import statsmodels.api as sm
import math
from fbprophet import Prophet
import numpy as np

1.12.9


In [13]:
def prepare_data(csv_file, window = 60):
    df = pd.read_csv(csv_file)
    df['published'] = pd.to_datetime(df['published'])
    df['favs'] = df['favs_lognorm'].apply(math.expm1)
    by_day = df[['published', 'favs']].set_index('published').resample('D').sum().fillna(0)
    by_day['favs_mean60'] = by_day['favs'].rolling(window = window, center = False).mean()
    return by_day

In [8]:
def predict(df, n_preds):
    train_df = df[['favs_mean60']].reset_index()
    train_df.columns = ['ds', 'y']
    m = Prophet()
    m.fit(train_df)
    future = m.make_future_dataframe(periods=n_preds)
    return m.predict(future)

In [9]:
def calc_error(y, y_pred):
    cmp_df = y_pred.join(y)
    e = cmp_df.ix[:, 0] - cmp_df.ix[:, 1]
    pe = 100*e / cmp_df.ix[:, 0]
    return np.mean(e.abs()), np.mean(pe.abs())

In [10]:
n_pred = 120
habr_by_day = prepare_data('habr_favs.csv')
prediction = predict(habr_by_day[:-n_pred], n_pred)

In [14]:
calc_error(habr_by_day[-n_pred:-30]['favs_mean60'], prediction[-n_pred:-30][['ds', 'yhat']].set_index('ds'))

(503.8197657138455, 27.563974326175554)

In [15]:
gt_by_day = prepare_data('gt_favs.csv')
prediction_gt = predict(gt_by_day[:-n_pred], n_pred)
calc_error(gt_by_day[-n_pred:-30]['favs_mean60'], prediction_gt[-n_pred:-30][['ds', 'yhat']].set_index('ds'))

(165.30230490579905, 21.982653061858453)

In [16]:
plotly_df(gt_by_day[-500:][['favs_mean60']].join(prediction_gt[-500:][['ds', 'yhat']].set_index('ds')))

In [17]:
plotly_df(habr_by_day[-500:][['favs_mean60']].join(prediction[-500:][['ds', 'yhat']].set_index('ds')))

In [18]:
pred_habr_full = predict(habr_by_day, 120)

In [19]:
pred_habr_full.tail()

Unnamed: 0,ds,t,trend,seasonal_lower,seasonal_upper,trend_lower,trend_upper,yhat_lower,yhat_upper,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,seasonal,yhat
3959,2017-04-26,1.030655,1371.596359,43.593618,43.593618,1269.072083,1464.619129,1162.447727,1674.244523,13.984542,13.984542,13.984542,29.609076,29.609076,29.609076,43.593618,1415.189977
3960,2017-04-27,1.03092,1368.094057,56.099347,56.099347,1264.099126,1465.282433,1179.513266,1674.566426,33.51769,33.51769,33.51769,22.581657,22.581657,22.581657,56.099347,1424.193404
3961,2017-04-28,1.031184,1364.591754,37.896988,37.896988,1258.925505,1465.477997,1138.40373,1649.777856,22.631969,22.631969,22.631969,15.265019,15.265019,15.265019,37.896988,1402.488742
3962,2017-04-29,1.031448,1361.089451,1.578981,1.578981,1253.659332,1464.096856,1097.457455,1616.220761,-6.159191,-6.159191,-6.159191,7.738172,7.738172,7.738172,1.578981,1362.668432
3963,2017-04-30,1.031712,1357.587148,-28.828295,-28.828295,1247.965408,1461.54536,1070.19575,1576.113138,-28.9112,-28.9112,-28.9112,0.082905,0.082905,0.082905,-28.828295,1328.758853


In [20]:
pred_gt_full = predict(gt_by_day, 120)

In [130]:
plotly_df(pd.merge(habr_by_day[['favs_mean60']], pred_habr_full[['ds', 'yhat']].set_index('ds'),\
                   left_index = True, right_index = True, how = 'outer'))

In [138]:
plotly_df(pd.merge(gt_by_day[['favs_mean60']], pred_gt_full[['ds', 'yhat']].set_index('ds'),\
                   left_index = True, right_index = True, how = 'outer'))

In [24]:
habr_data = pd.merge(habr_by_day[['favs_mean60']], pred_habr_full[['ds', 'yhat']].set_index('ds'),\
                   left_index = True, right_index = True, how = 'outer')
habr_data.to_csv('habr_favs_mean_pred.csv')

In [25]:
habr_data = pd.merge(gt_by_day[['favs_mean60']], pred_gt_full[['ds', 'yhat']].set_index('ds'),\
                   left_index = True, right_index = True, how = 'outer')
habr_data.to_csv('gt_favs_mean_pred.csv')