In [None]:
# default_exp tabular.learner

# learner

> API details.

In [None]:
#export
from fastai.torch_basics import *
from fastai.data.all import *
from fastai.tabular.data import *
from fastai.tabular.core import *
from fastai.tabular.model import *
from fastai.basics import *
from fastrenewables.tabular.core import *
from fastrenewables.tabular.data import *
from fastrenewables.tabular.model import *
from fastrenewables.losses import VILoss

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#export
class RenewableLearner(Learner):
    "`Learner` for renewable data"
    def predict(self, ds_idx=1, test_dl=None, filter=True):
        device = next(self.model.parameters()).device
        preds, targets = None, None
        if test_dl is not None:
            to = test_dl.train_ds
        elif ds_idx == 0:
            to = self.dls.train_ds
        elif ds_idx == 1:
            to = self.dls.valid_ds
            
        # to increase speed we direclty predict on all tensors    
        if isinstance(to, (TabularPandas, TabularRenewables, TabDataLoader)):
            if getattr(to, 'regression_setup', False):
                ys_type = np.float32
            else:
                ys_type = np.long
            
            cats = tensor(to.cats.values.astype(np.long))
            xs = tensor(to.conts.values.astype(np.float32))
            targets = tensor(to.y.values.astype(ys_type))
        
            with torch.no_grad():
                preds = self.model(cats.to(device), xs.to(device))

            preds, targets = to_np(preds).reshape(-1), to_np(targets).reshape(-1)
            if filter:
                preds[preds < 0] = 0
                preds[preds > 1.1] = 1.1
        else:
            raise NotImplementedError("Unknown type")
            
        return preds, targets

In [None]:
np.long

int

In [None]:
show_doc(RenewableLearner, title_level=3)

<h3 id="RenewableLearner" class="doc_header"><code>class</code> <code>RenewableLearner</code><a href="" class="source_link" style="float:right">[source]</a></h3>

> <code>RenewableLearner</code>(**`dls`**, **`model`**, **`loss_func`**=*`None`*, **`opt_func`**=*`Adam`*, **`lr`**=*`0.001`*, **`splitter`**=*`trainable_params`*, **`cbs`**=*`None`*, **`metrics`**=*`None`*, **`path`**=*`None`*, **`model_dir`**=*`'models'`*, **`wd`**=*`None`*, **`wd_bn_bias`**=*`False`*, **`train_bn`**=*`True`*, **`moms`**=*`(0.95, 0.85, 0.95)`*) :: `Learner`

`Learner` for renewable data

In [None]:
cont_names = ['T_HAG_2_M', 'RELHUM_HAG_2_M', 'PS_SFC_0_M', 'ASWDIFDS_SFC_0_M',
       'ASWDIRS_SFC_0_M', 'WindSpeed58m']
cat_names = ['TaskID', 'Month', 'Day', 'Hour']
pd.options.mode.chained_assignment=None
dls = RenewableDataLoaders.from_files(glob.glob("../data/*.h5"), y_names="PowerGeneration", 
                                      pre_procs=[FilterYear(year=2020), 
                                                 AddSeasonalFeatures(as_cont=False)], 
                                      cat_names=cat_names, cont_names=cont_names)

In [None]:
#export
@delegates(Learner.__init__)
def renewable_learner(dls, layers=None, emb_szs=None, config=None, n_out=None, y_range=None, embedding_type=EmbeddingType.Normal, **kwargs):
    "Get a `Learner` using `dls`, with `metrics`, including a `TabularModel` created using the remaining params."
    if config is None: config = tabular_config()
    
    to = dls.train_ds
    emb_szs = get_emb_sz(dls.train_ds, {} if emb_szs is None else emb_szs)
    if n_out is None: n_out = get_c(dls)
    assert n_out, "`n_out` is not defined, and could not be inferred from data, set `dls.c` or pass `n_out`"
    
    if layers is None: layers = [len(dls.cont_names), 200, 100, n_out]
    if y_range is None and 'y_range' in config: y_range = config.pop('y_range')
        
    embed_p = kwargs["embed_p"].pop() if "embed_p" in kwargs.keys() else 0.1
    
    if emb_szs is not None:
        emb_module = EmbeddingModule(None, embedding_dropout=embed_p, embedding_dimensions=emb_szs)
        
    model = MultiLayerPerceptron(layers, embedding_module=emb_module, **config)
    
    if embedding_type==EmbeddingType.Bayes and "loss_func" not in kwargs.keys():
        base_loss = getattr(dls.train_ds, 'loss_func', None)
        assert base_loss is not None, "Could not infer loss function from the data, please pass a loss function."
        loss_func=VILoss(model=model, base_loss=base_loss, kl_weight=0.1)
        kwargs["loss_func"] = loss_func
    
    return RenewableLearner(dls, model, **kwargs)

In [None]:
learn = renewable_learner(dls, metrics=rmse)

In [None]:
dls.show_batch()

Unnamed: 0,TaskID,Month,Day,Hour,T_HAG_2_M,RELHUM_HAG_2_M,PS_SFC_0_M,ASWDIFDS_SFC_0_M,ASWDIRS_SFC_0_M,WindSpeed58m,PowerGeneration
0,1,11,22,19,274.521,93.115999,97203.214978,38.393001,8.624999,2.200401,0.017
1,0,3,14,5,277.642,80.636,99620.72301,56.42,4.266001,6.164633,0.134
2,1,10,8,19,284.353,93.917,97377.358978,38.499,14.476999,8.565258,0.15
3,2,5,29,9,284.972,69.292,93429.566003,99.847,23.871002,5.777053,0.092
4,1,4,23,3,280.034,65.099,96929.59004,51.098,193.095,5.372576,0.041
5,2,1,27,3,272.914,91.351,91737.972986,29.742001,3.800997,4.974804,0.126
6,1,3,20,10,280.145,58.037,99861.651958,51.482,109.617999,2.915605,0.054
7,2,11,3,17,281.462,69.377,90840.137057,42.210001,19.737999,15.319545,0.037
8,1,7,14,0,286.722,93.051001,98198.473,69.326,25.054999,2.073009,0.001
9,1,10,25,18,283.998,84.012,98637.234026,46.888,58.942001,5.74268,0.019


In [None]:
learn.fit_one_cycle(1)

epoch,train_loss,valid_loss,_rmse,time
0,0.01498,0.01365,0.116834,00:03


In [None]:
n_conts=len(dls.cont_names)
hidden_layers = [n_conts] + get_structure(n_conts*20,90,10)
learn = renewable_learner(dls, metrics=rmse, layers=hidden_layers, embedding_type=EmbeddingType.Bayes)

In [None]:
learn.summary()

MultiLayerPerceptron (Input shape: 64 x torch.Size([64, 6]))
Layer (type)         Output Shape         Param #    Trainable 
                     64 x 3              
Embedding                                 12         True      
____________________________________________________________________________
                     64 x 7              
Embedding                                 91         True      
____________________________________________________________________________
                     64 x 11             
Embedding                                 352        True      
____________________________________________________________________________
                     64 x 10             
Embedding                                 250        True      
Dropout                                                        
Identity                                                       
BatchNorm1d                               12         True      
BatchNorm1d                 

In [None]:
learn.fit_one_cycle(1)

epoch,train_loss,valid_loss,_rmse,time


In [None]:
preds,target = learn.predict(ds_idx=1)

In [None]:
import matplotlib.pyplot as plt
plt.scatter(preds,target)

In [None]:
#hide
from nbdev.export import notebook2script
notebook2script()