In [2]:
import pandas as pd
from pathlib import Path

_time_table = {}

def _get_formatted_time(idx):
    if _time_table.get(idx) != None:
        return _time_table[idx]

    days_in_months = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]

    year = idx // 365

    days = idx % 365

    month = None
    for m, days_in_m in enumerate(days_in_months):
        if days < days_in_m:
            month = m + 1
            break

        days -= days_in_m

    date = days + 1

    _time_table[idx] = int(
        '201{}{:02d}{:02d}'.format(year + 7, month, date))

    return _time_table[idx]

In [7]:
import pickle
import torch
import pandas as pd
from pathlib import Path
from torch.utils.data import Dataset

from base import BaseDataLoader

class FamilyMartDataset(Dataset):
    def __init__(self, data_dir):
        # Window size is the number of consecutive days before the target
        # day to choose for the model to predict the sales of the target
        # day. i.e., the target day is not included in window size.
        self._time_window_size = 20

        with open(data_dir / 'sales_data.pkl', 'rb') as file:
            self._sales_data = torch.tensor(pickle.load(file), dtype=torch.float)
        with open(data_dir / 'commodity_codes.pkl', 'rb') as file:
            self._commodity_codes = pickle.load(file)
        with open(data_dir / 'store_codes.pkl', 'rb') as file:
            self._store_codes = pickle.load(file)
    
    def __len__(self):
        return 2 * 365 - self._time_window_size

    def __getitem__(self, idx):
        interval = len(self._store_codes) * len(self._commodity_codes)

        x_index = idx * interval

        x_indices = torch.tensor(
            [range(x_index + i, self._time_window_size * interval, interval)
             for i in range(interval)])

        y_index = (idx + self._time_window_size) * interval

        y_indices = torch.tensor(
            [y_index + i for i in range(interval)])

        return self._sales_data[x_indices], self._sales_data[y_indices]

class FamilyMartDataLoader(BaseDataLoader):
    def __init__(self, data_dir, batch_size, shuffle=True,
                 validation_split_ratio=0.0, num_workers=1):

        dataset = FamilyMartDataset(Path(data_dir))

        super().__init__(dataset, batch_size, shuffle, validation_split_ratio,
                         num_workers)

x, y = FamilyMartDataset(Path('data/family_mart'))[431]
print(x.size(), y.size())
print(x, y, torch.__version__)

IndexError: tensors used as indices must be long, byte or bool tensors

In [34]:
commodity_dataframe = pd.read_csv(
        Path(data_dir) / '商品主檔.txt'.format(year), sep='\t')
print(commodity_dataframe.loc[:, '商品代號'].unique())

[ 617008  610779  610501 ... 6180013 6180136 6180027]


In [6]:
!which python

/usr/local/bin/python
