In [1]:
import pandas as pd
from fbprophet import Prophet
def load_data(data_path):
    train = pd.read_csv('%s/train.csv' % data_path, parse_dates=['date'],
                       skiprows=range(1, 86672217) #Skip dates before 2016-08-01
                       )
    train.loc[train['unit_sales']<0, 'unit_sales']=0
    test = pd.read_csv('%s/test.csv' % data_path, parse_dates=['date'])
    items = pd.read_csv('%s/items.csv' % data_path)
    return train, test, items
def get_test_good(train, test):
    train_last_date = train.groupby(['item_nbr', 'store_nbr'])['date'].max().reset_index()
    train_last_date.rename(columns={'date':'last_date'}, inplace=True)
    train_good_item_store = train_last_date[train_last_date['last_date']>=pd.to_datetime('2016-08-01')][['item_nbr', 'store_nbr', 'last_date']]
    test_item_store = test.groupby(['item_nbr', 'store_nbr'])['date'].size().reset_index()[['item_nbr', 'store_nbr']]
    test_item_store_join_good = pd.merge(test_item_store, train_good_item_store, on=['item_nbr', 'store_nbr'], how='left')
    test_item_store_good = test_item_store_join_good[test_item_store_join_good['last_date'].notnull()][['item_nbr', 'store_nbr']]
    test_good=pd.merge(test_item_store_good, test, on=['item_nbr', 'store_nbr'], how='left')
    return test_good

In [2]:
data_path = './data'
train, test, items = load_data(data_path)
test_good = get_test_good(train, test)
test_good_item_store = test_good.groupby(['item_nbr', 'store_nbr'])['date'].count().reset_index()[['item_nbr', 'store_nbr']]

In [3]:
def fill_missing_date(df, total_dates=train['date'].unique()):
    idx = df.iloc[-1:,0].values[0]
    for d in set(total_dates)-set(df['date'].unique()): 
        idx+=1
        df.loc[idx, ['date', 'item_nbr', 'store_nbr']]= [pd.to_datetime(d), int(df.iloc[0]['item_nbr']), int(df.iloc[0]['store_nbr'])]
    return df
def get_predictions_for_test_good(test_good, train):
    total_dates = train['date'].unique()
    result = pd.DataFrame(columns=['id', 'unit_sales'])
    problem_pairs = []
    for name, y in test_good.groupby(['item_nbr', 'store_nbr']):
        item_nbr=name[0]
        store_nbr = name[1]
        df = train[(train.item_nbr==item_nbr)&(train.store_nbr==store_nbr)]
        print("item_nbr :",item_nbr,"store_nbr :", store_nbr, "df :", df.shape, df['date'].max())
        CV_SIZE = 16 #if you make it bigger, fill missing dates in cv with 0 if any
        TRAIN_SIZE = 365
        total_dates = train['date'].unique()
        df = fill_missing_date(df, total_dates)
        df = df.sort_values(by=['date'])
        X = df[-TRAIN_SIZE:]
        print('Train on: {}'.format(X.shape))
        X = X[['date','unit_sales']]
        X.columns = ['ds', 'y']
        m = Prophet(yearly_seasonality=True)
        try: 
            m.fit(X)
        except ValueError:
            print("problem for this item store pair")
            problem_pairs.append((item_nbr, store_nbr))
            continue           
        future = m.make_future_dataframe(periods=CV_SIZE)
        pred = m.predict(future)
        data = pred[['ds','yhat']].tail(CV_SIZE)
        data = pred[['ds','yhat']].merge(y, left_on='ds', right_on='date')
        data['unit_sales'] = data['yhat'].fillna(0).clip(0, 999999)
        result = result.append(data[['id', 'unit_sales']])
        print("result", result.shape)
    return (result, problem_pairs)
def get_full_predictions_for_test_good(test_good, train):
    total_dates = train['date'].unique()
    result = pd.DataFrame(columns=['id', 'unit_sales'])
    problem_pairs = []
    for name, y in test_good.groupby(['item_nbr', 'store_nbr']):
        item_nbr=name[0]
        store_nbr = name[1]
        df = train[(train.item_nbr==item_nbr)&(train.store_nbr==store_nbr)]
        CV_SIZE = 16 #if you make it bigger, fill missing dates in cv with 0 if any
        TRAIN_SIZE = 365
        total_dates = train['date'].unique()
        df = fill_missing_date(df, total_dates)
        df = df.sort_values(by=['date'])
        X = df[-TRAIN_SIZE:]
        X = X[['date','unit_sales']]
        X.columns = ['ds', 'y']
        m = Prophet(yearly_seasonality=True)
        try: 
            m.fit(X)
        except ValueError:
            print("problem for this item store pair", item_nbr)
            problem_pairs.append((item_nbr, store_nbr))
            continue           
        future = m.make_future_dataframe(periods=CV_SIZE)
        pred = m.predict(future)
        data = pred.tail(CV_SIZE)
        data = pred.merge(y, left_on='ds', right_on='date')
        data['unit_sales'] = data['yhat'].fillna(0).clip(0, 999999)
        result = result.append(data.loc[:, data.columns != 'ds'])
        print("result", result.shape)
    return (result, problem_pairs)

In [6]:
test_result_part, problem_pairs_part = get_full_predictions_for_test_good(test_good, train)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
INFO:fbprophet.forecaster:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet.forecaster:n_changepoints greater than number of observations.Using 19.0.


result (5, 24)
