<img src="images/logo_city.png" align="right" width="20%">

# Data Generation Module Development

## 1. Goal

The module should be something like below:

In [1]:
import timeslice                    # to be created library
import timeslice.source as source   # sub module in rnnpipe lib, define source raw data
import timeslice.rule  as rule      # sub module in rnnpipe lib, define transformation rule

import timeslice.viz as viz         # visualize dataset if apply
import torch

# connect to database
# taxi_dbs = source.DatabaseSource(db='database connect info')

# or use a csv file as source
taxi_csv = source.CSVSource(file='dataset/nytaxi_yellow_2017_jan.csv')
# print(taxi_csv)
# taxi_csv.head(10)

In [2]:
# set time split rule to generate different dataset
import timeslice.worker as worker   # generate dataset
import importlib
importlib.reload(timeslice.worker)

time_rule = rule.TimeSlice(stp='2017-01-01 00:00:00',
                           etp='2017-01-01 00:20:00',
                           freq='10min')

# instantiate data worker object
data_worker = worker.Worker(source=taxi_csv,
                            destin='yearly_continuous',
                            rule=time_rule,
                            viz=True)

# return a tensor dataset
dataset = data_worker.generate()

2017-01-01 00:00:00 2017-01-01 00:10:00
Just generated empty snapshot:
 [[[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  ...
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  ...
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  ...
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 ...

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  ...
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  ...
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  ...
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]]
Before normalization:
[[[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  ...
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  ...
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 1.]
  ...
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 ...

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  ...
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0.

In [3]:
tensor = torch.load('yearly_continuous/tensors/2017-01-01_000000-2017-01-01_002000-0-2017-01-01_000000-2017-01-01_001000.pkl')


In [4]:
tensor.max()

tensor(0.1456, dtype=torch.float64)