In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math

from fbprophet import Prophet

In [2]:
# function to run Prophet and return the forecast
def ProphetForecast(df_input):
    m = Prophet(daily_seasonality=False, yearly_seasonality=True, 
            seasonality_mode='multiplicative', growth='logistic')
    m.fit(df_input)
    # find how many periods we need by finding the last day in the item
    days_to_predict = pd.to_datetime('2016-1-31') - df_input['ds'].max()
    future = m.make_future_dataframe(periods=days_to_predict.days)
    # in logarithmic space 10-2 is 0.01
    future['floor'] = -2
    future['cap'] = df_input['y'].max()
    forecast = m.predict(future)
    return forecast

In [3]:
df_sales = pd.read_csv('sales_train_v2.csv')
df_sales['ds'] = pd.to_datetime(df_sales['date'],  dayfirst=True)

# remove 0 or negative item count
df_sales = df_sales.loc[df_sales['item_cnt_day']>0]


# Create dataframe of most sold items

In [4]:
# find the most sold items
df_sales_totalitem = df_sales.loc[(df_sales['item_cnt_day']>0)]
df_sales_totalitem = df_sales_totalitem.groupby(['item_id'])['item_cnt_day'].sum().reset_index(name='item_cnt_total')
df_sales_totalitem = df_sales_totalitem.sort_values(by='item_cnt_total',ascending=False)
# drop items that did not sell more than 10000 items to start
df_sales_totalitem = df_sales_totalitem.loc[df_sales_totalitem['item_cnt_total']>10000]

# Loop starts here

In [8]:
# create a yhat dataframe to put the forecasted data for each item
yhat = df_sales[['ds']]
for index, row in df_sales_totalitem.iterrows():
    # create df for prophet
    df_fbprophet = df_sales.loc[(df_sales['item_id']==row['item_id'])]
    # sum the total sold for each day for this item
    df_fbprophet = df_fbprophet.groupby(['ds'])['item_cnt_day'].sum().reset_index(name='y')

    # convert to log space
    df_fbprophet['y'] = np.log(df_fbprophet['y'])

    # in logarithmic space 10-2 is 0.01
    df_fbprophet['floor']=-2
    df_fbprophet['cap']=df_fbprophet['y'].max()
    forecast = ProphetForecast(df_fbprophet)
    # forecast for the whole month of November
    print('item_id: ' + str(row['item_id']) + ', total items:' + str(row['item_cnt_total']), end="")
    print(', Nov 2015 forecast: '
          + str(int(round(np.exp(forecast.loc[(forecast['ds']>=pd.to_datetime('2015-11-1')) 
                             & (forecast['ds']<=pd.to_datetime('2015-11-30'))]['yhat']).sum()))))
    # make a new column in forecast with the name of our item
    forecast[str(int(row['item_id']))] = np.exp(forecast['yhat'])
    # merge that data into the correct date time in our dataframe
    yhat = pd.merge(yhat,forecast[['ds', str(int(row['item_id']))]], how='outer', on='ds')


item_id: 20949.0, total items:187660.0, Nov 2015 forecast: 3079
item_id: 2808.0, total items:17255.0, Nov 2015 forecast: 40
item_id: 3732.0, total items:16676.0, Nov 2015 forecast: 45
item_id: 17717.0, total items:15830.0, Nov 2015 forecast: 733
item_id: 5822.0, total items:14522.0, Nov 2015 forecast: 235
item_id: 3734.0, total items:11733.0, Nov 2015 forecast: 45
item_id: 6675.0, total items:10315.0, Nov 2015 forecast: 50
item_id: 3731.0, total items:10105.0, Nov 2015 forecast: 51
item_id: 1855.0, total items:10041.0, Nov 2015 forecast: 37


In [6]:
#fig, ax = plt.subplots(figsize=(15,8))
#ax = yhat.plot(ax=ax, x='ds', y='17717',  color='r',   legend=False, grid=True)
#ax = yhat.plot(ax=ax, x='ds', y='20949',  color='b',   legend=False, grid=True)

In [7]:
# 30 days of November
yhat.loc[(yhat['ds']>=pd.to_datetime('2015-11-1'))
             & (yhat['ds']<=pd.to_datetime('2015-11-30'))]

Unnamed: 0,ds,20949,2808,3732,17717,5822,3734,6675,3731,1855
2928493,2015-11-01,58.297098,0.402873,0.234024,35.214483,3.015651,0.206106,0.008219,0.138976,0.261447
2928494,2015-11-02,42.978768,0.434103,0.300152,26.923541,2.724517,0.279999,0.007171,0.137339,0.308133
2928495,2015-11-03,46.26836,0.433296,0.303582,30.086168,2.829765,0.277942,0.005982,0.139783,0.317642
2928496,2015-11-04,46.301997,0.426908,0.309442,31.674194,2.7905,0.294451,0.004603,0.148141,0.332404
2928497,2015-11-05,48.623078,0.399986,0.300333,34.715506,2.752341,0.289315,0.003959,0.150483,0.307291
2928498,2015-11-06,57.841263,0.384838,0.277235,41.789225,3.175267,0.281959,0.00332,0.151531,0.289026
2928499,2015-11-07,71.373167,0.360218,0.221111,63.770702,3.335777,0.214984,0.002756,0.148848,0.244085
2928500,2015-11-08,61.721221,0.382278,0.240603,60.365724,3.165508,0.21669,0.003261,0.164439,0.259101
2928501,2015-11-09,45.512401,0.412283,0.310075,45.925108,2.865139,0.293572,0.003293,0.166555,0.309506
2928502,2015-11-10,48.820142,0.411797,0.314421,50.944096,2.971503,0.289728,0.003239,0.172679,0.322524
