In [1]:
# default_exp tabular.data

# tabular.data

> API details.

In [2]:
#export
from fastai.torch_basics import *
from fastai.data.all import *
from fastai.tabular.data import *
from fastai.tabular.core import *
from fastrenewables.tabular.core import *
import glob

In [3]:
#hide
from nbdev.showdoc import *

In [4]:
#export
class RenewableDataLoaders(DataLoaders):
    "A dataloader for `fastrenewables` tabular data."
    @classmethod
    @delegates(Tabular.dataloaders, but=["dl_type", "dl_kwargs"])
    def from_df(cls, df, path='.', procs=None, pre_procs=None, cat_names=None, cont_names=None, y_names=None, y_block=None,
                splits=None, **kwargs):
        
        "Create from `df` in `path` using `procs`"
        if cat_names is None: cat_names = []
        if cont_names is None: cont_names = list(set(df)-set(L(cat_names))-set(L(y_names)))
        
        if pre_procs is None: pre_procs = [
            CreateTimeStampIndex("TimeUTC"),
                AddSeasonalFeatures,
            ]
        if procs is None:
            procs = [
                NormalizePerTask, 
                    Categorify]
        
        y_block = RegressionBlock()
        if "y_block" in list(kwargs.keys()):
            y_block = kwargs["y_block"]
            
        splits = RandomSplitter(valid_pct=0.2) if splits is None else splits
        to = TabularRenewables(
            df,
            cont_names=cont_names,
            cat_names=cat_names,
            y_names=y_names,
            pre_process=pre_procs,
            procs=procs,
            splits=splits,
            y_block=y_block
        )
        

        return to.dataloaders(path=path, **kwargs)

    
    @classmethod
    def from_files(cls, files, **kwargs):
        dfs = read_files(files)
        
        dfs = pd.concat(dfs, axis=0)
#         if "cat_names" in kwargs.keys():
#             kwargs["cat_names"] = kwargs["cat_names"] if "TaskID" in kwargs["cat_names"] else kwargs["cat_names"] + ["TaskID"]
#         else:
#             kwargs["cat_names"] = ["TaskID"]
        
        return cls.from_df(dfs, **kwargs)

In [5]:
files = glob.glob("../data/*.h5"); len(files)

3

In [6]:
files

['../data/00011.h5', '../data/00161.h5', '../data/00090.h5']

In [7]:
cont_names = ['T_HAG_2_M', 'RELHUM_HAG_2_M', 'PS_SFC_0_M', 'ASWDIFDS_SFC_0_M',
       'ASWDIRS_SFC_0_M', 'WindSpeed58m',
       'SinWindDirection58m', 'CosWindDirection58m', 'WindSpeed60m',
       'SinWindDirection60m', 'CosWindDirection60m', 'WindSpeed58mMinus_t_1',
       'SinWindDirection58mMinus_t_1', 'CosWindDirection58mMinus_t_1',
       'WindSpeed60mMinus_t_1', 'SinWindDirection60mMinus_t_1',
       'CosWindDirection60mMinus_t_1', 'WindSpeed58mPlus_t_1',
       'SinWindDirection58mPlus_t_1', 'CosWindDirection58mPlus_t_1',
       'WindSpeed60mPlus_t_1', 'SinWindDirection60mPlus_t_1',
       'CosWindDirection60mPlus_t_1']
cat_names = ['TaskID', 'Month', 'Day', 'Hour']

In [8]:
pd.options.mode.chained_assignment=None
dls = RenewableDataLoaders.from_files(glob.glob("../data/*.h5"), y_names="PowerGeneration", 
                                      pre_procs=[FilterYear(year=2020), 
                                                 AddSeasonalFeatures(as_cont=False)], 
                                      cat_names=cat_names, cont_names=cont_names)

In [9]:
cat,X, y = dls.one_batch()

In [10]:
cat.shape, X.shape, y.shape

(torch.Size([64, 4]), torch.Size([64, 23]), torch.Size([64, 1]))

In [11]:
dls.train_ds.items.index.max()

Timestamp('2019-12-31 23:00:00+0000', tz='UTC')

In [12]:
#hide
from nbdev.export import notebook2script
notebook2script()

Converted 00a_utils.ipynb.
Converted 00b_losses.ipynb.
Converted 00c_utils_blitz.ipynb.
Converted 00d_baselines.ipynb.
Converted 00e_metrics.ipynb.
Converted 00f_utils_pytorch.ipynb.
Converted 01_tabular.core.ipynb.
Converted 02_tabular.data.ipynb.
Converted 03_tabular.model.ipynb.
Converted 04_tabular.learner.ipynb.
Converted 05_timeseries.core.ipynb.
Converted 06_timeseries.data.ipynb.
Converted 07_timeseries.model.ipynb.
Converted 08_timeseries.learner.ipynb.
Converted 09_gan.core.ipynb.
Converted 10_gan.model.ipynb.
Converted 11_gan.learner.ipynb.
Converted 12_autoencoder_models.ipynb.
Converted 13_probabilistic_models.ipynb.
Converted index.ipynb.
