In [None]:
# default_exp data.tabular

# Data Tabular

> Main Tabular functions used throughout the library.

In [None]:
#export
from tsai.imports import *
from fastai.tabular.all import *

In [None]:
path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')
splits = RandomSplitter()(range_of(df))
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
procs = [Categorify, FillMissing, Normalize]
y_names = 'salary'
y_block = CategoryBlock()
pd.options.mode.chained_assignment=None
to = TabularPandas(df, procs=procs, cat_names=cat_names, cont_names=cont_names,
                   y_names=y_names, y_block=y_block, splits=splits, inplace=True,
                   reduce_memory=False)

In [None]:
# export
class TabularDataset():
    "A `Numpy` dataset from a `TabularPandas` object"
    def __init__(self, to):
        self.cats = to.cats.to_numpy().astype(np.long)
        self.conts = to.conts.to_numpy().astype(np.float32)
        self.ys = to.ys.to_numpy()
    def __getitem__(self, idx): return self.cats[idx], self.conts[idx], self.ys[idx]
    def __len__(self): return len(self.cats)
    @property
    def c(self): return 0 if self.ys is None else 1 if isinstance(self.ys[0], float) else len(np.unique(self.ys))

class TabularDataLoader(DataLoader):
    def __init__(self, dataset, bs=1, num_workers=0, device=None, train=False, **kwargs):
        device = ifnone(device, default_device())
        super().__init__(dataset, bs=min(bs, len(dataset)), num_workers=num_workers, shuffle=train, device=device, drop_last=train, **kwargs)
        self.device, self.shuffle = device, train
    def create_item(self, s): return s
    def get_idxs(self):
        idxs = Inf.count if self.indexed else Inf.nones
        if self.n is not None: idxs = list(range(len(self.dataset)))
        if self.shuffle: self.shuffle_fn()
        return idxs
    def create_batch(self, b):
        return self.dataset[b[0]:b[0]+self.bs]
    def shuffle_fn(self):
        "Shuffle dataset after each epoch"
        rng = np.random.permutation(len(self.dataset))
        self.dataset.cats = self.dataset.cats[rng]
        self.dataset.conts = self.dataset.conts[rng]
        self.dataset.ys = self.dataset.ys[rng]
    def to(self, device): 
        self.device = device
#     def ds_to(self, device=None):
        self.dataset.cats = tensor(self.dataset.cats).to(device=self.device)
        self.dataset.conts = tensor(self.dataset.conts).to(device=self.device)
        self.dataset.ys = tensor(self.dataset.ys).to(device=self.device)

In [None]:
train_ds = TabularDataset(to.train)
valid_ds = TabularDataset(to.valid)
train_dl = TabularDataLoader(train_ds, bs=512, train=True)
valid_dl = TabularDataLoader(valid_ds, bs=512)
dls = DataLoaders(train_dl,valid_dl)

In [None]:
emb_szs = get_emb_sz(to)
net = TabularModel(emb_szs, 3, 2, layers=[200,100])#.cuda()
learn = Learner(dls, net, metrics=accuracy, loss_func=CrossEntropyLossFlat())

In [None]:
learn.fit(1, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.37785,0.364125,0.829238,00:06


In [None]:
#hide
create_scripts()
beep()

<IPython.core.display.Javascript object>


Current notebook saved.

Converted 000_utils.ipynb.
Converted 001_data.external.ipynb.
Converted 002_data.core.ipynb.
Converted 003_data.transforms.ipynb.
Converted 005_data.tabular.ipynb.
Converted 006_data.validation.ipynb.
Converted 007_metrics.ipynb.
Converted 008_learner.ipynb.
Converted 009_optimizers.ipynb.
Converted 010_rocket_functions.ipynb.
Converted 100_layers.ipynb.
Converted 100b_models_utils.ipynb.
Converted 101_ResNet.ipynb.
Converted 102_InceptionTime.ipynb.
Converted index.ipynb.

Checking folder: /Users/nacho/Documents/Machine_Learning/Jupyter_Notebooks/timeseries/tsai
Correct conversion! 😃
Total elapsed time 13 s
21-04-2020 15:29:32
