In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


def load_dataset():
    
    int_type = 'int16'
    float_type = 'float16'
    calendar_dtypes = dict([(col, int_type) for col in ['wm_yr_wk', 'wday', 'month', 'year', 'snap_CA', 'snap_TX', 'snap_WI']])
    calendar = pd.read_csv('../input/m5-forecasting-accuracy/calendar.csv', dtype=calendar_dtypes)
    
    sales_dtypes = dict([(col, int_type) for col in [f'd_{i}' for i in range(1, 1913+1)]])
    sales = pd.read_csv('../input/m5-forecasting-accuracy/sales_train_validation.csv', dtype=sales_dtypes)
    
    prices_dtypes = {'wm_yr_wk': int_type, 'sell_price': float_type}
    prices = pd.read_csv('../input/m5-forecasting-accuracy/sell_prices.csv', dtype=prices_dtypes)
    
    return (calendar, sales, prices)

In [None]:
calendar, sales, prices = load_dataset()

# Weekly Sales

In [None]:
%%time

weekly_sales = None

# What is the last value for 'wm_yr_wk'?
last_wm_yr_wk = calendar[calendar['d'] == 'd_1913']['wm_yr_wk'].iloc[0]

for wm_yr_wk in calendar.loc[(calendar['wm_yr_wk'] <= last_wm_yr_wk), 'wm_yr_wk'].unique():
    # Which days are included in `wm_yr_wk`
    days_in_wk = calendar.loc[calendar['wm_yr_wk'] == wm_yr_wk, 'd'].to_list()
    
    # How many item `item_id`s were sold in store  `store_id`?
    # Columns look like:
    #     | item_id | store_id | #sold |
    item_sold = pd.concat([sales.loc[:, ['item_id', 'store_id']], sales.loc[:, days_in_wk].sum(axis=1).rename('#sold').astype('int16')], axis=1)

    # How much was item `item_id` in week `wm_yr_wk` 
    # Columns look like:
    #     | item_id | store_id | sell_price |
    item_price = prices.loc[prices['wm_yr_wk'] == wm_yr_wk, ['store_id', 'item_id', 'sell_price']]
    item_price = pd.merge(sales[['item_id', 'store_id']], item_price, on=['item_id', 'store_id'], how='left').fillna(0) # 売れてないところがNaNになるので、売上計算上0にしても問題ない

    # How much sales did item `item_id` earned in store `store_id`
    # Columns look like:
    #     | item_id | store_id | sell_price | #sold | sales
    week_sales = pd.merge(item_price, item_sold, on=['item_id', 'store_id'], how='left')
    week_sales['sales'] = week_sales[['sell_price', '#sold']].product(axis=1).astype('float32')

    # add `wm_yr_wk`
    week_sales['wm_yr_wk'] = wm_yr_wk
    week_sales['wm_yr_wk'] = week_sales['wm_yr_wk'].astype('int16')
    
    # Do concatenation to bottom.
    if weekly_sales is None:
        weekly_sales = week_sales
    else:
        weekly_sales = pd.concat([weekly_sales, week_sales])

In [None]:
calendar['date'] = pd.to_datetime(calendar['date'])
wm_yr_wk_values = np.sort(calendar['wm_yr_wk'].unique())
wm_yr_wk_values_new = np.arange(len(wm_yr_wk_values))
wm_yr_wk_values_replace = dict(zip(wm_yr_wk_values, wm_yr_wk_values_new))
calendar['#week'] = calendar['wm_yr_wk'].replace(wm_yr_wk_values_replace)

In [None]:
calendar[['wm_yr_wk', '#week']].head(16)

If you watch this dataframe carefully, you notice that `wm_yr_wk` is not sequential.  


In [None]:
calendar['wm_yr_wk'].min(), calendar['wm_yr_wk'].max()

In [None]:
(calendar['wm_yr_wk'] % 100).max()

Above 2 cells explain that `wm_yr_wk` is not sequential.  
So re-index wm_yr_wk from zero.


In [None]:
prices['#week'] = prices['wm_yr_wk'].replace(wm_yr_wk_values_replace)
weekly_sales['#week'] = weekly_sales['wm_yr_wk'].replace(wm_yr_wk_values_replace)

In [None]:
def plot_weekly_sales(item_id, fig=None):
    store_id_axes = [
        ((0, 0), 'CA_1'), ((0, 1), 'CA_2'), ((0, 2), 'CA_3'), ((0, 3), 'CA_4'), 
        ((1, 0), 'TX_1'), ((1, 1), 'TX_2'), ((1, 2), 'TX_3'), 
        ((2, 0), 'WI_1'), ((2, 1), 'WI_2'), ((2, 2), 'WI_3')
    ]
    if fig is None:
        fig, axes = plt.subplots(3, 4)
        fig.set_figwidth(16)
        fig.set_figheight(9)
    else:
        axes = fig.subplots(3, 4)
        
    for (r, c), store_id in store_id_axes:
        df = weekly_sales[
            (weekly_sales['item_id'] == item_id) & (weekly_sales['store_id'] == store_id)
        ]
        axes[r][c].bar(x=df['#week'], height=df['sales'])
        axes[r][c].set_xlabel('#week')
        axes[r][c].set_ylabel('sales[USD]')
        axes[r][c].set_title(f'{item_id} {store_id}')
    axes[1][3].cla()
    axes[2][3].cla()
    fig.suptitle(item_id)
    fig.tight_layout()
    
    return fig

In [None]:
fig = plot_weekly_sales('HOBBIES_1_001')

In [None]:
weekly_sales.head(10)

In [None]:
weekly_sales.to_csv('weekly_sales.csv')

I'm happy if you upvote or comment!! Thank you.