# Minimal example for how to use Fastai for tabular data
Fastai is a beautful library that makes Neural Network's accessible and that applies best practices by default. It makes it super easy and quick to create a baseline model including dense neural network with categorical feature embeddings.

In [None]:
import numpy as np
import pandas as pd
import fastai

In [None]:
df_train = pd.read_csv('/kaggle/input/tabular-playground-series-jun-2021/train.csv', index_col=0)
df_test  = pd.read_csv('/kaggle/input/tabular-playground-series-jun-2021/test.csv', index_col=0)
df_sub   = pd.read_csv('/kaggle/input/tabular-playground-series-jun-2021/sample_submission.csv', index_col=0)
df_train

In [None]:
from fastai.tabular.all import *

In [None]:
feature_names = df_train.filter(regex='feat').columns.to_list()

In [None]:
def cardinality(x):
    return len(pd.value_counts(x))

In [None]:
cat_names = []
cont_names = []

for _f in feature_names:
    if cardinality(df_train[_f])<400:
        cat_names.append(_f)
    else:
        cont_names.append(_f)

In [None]:
def seed_everything(seed=9878732):
    '''https://www.kaggle.com/kaerunantoka/titanic-pytorch-nn-tutorial'''
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

In [None]:
seed_everything()

splits = RandomSplitter(valid_pct=0.2)(range_of(df_train))

to = TabularPandas(
    df_train,
    y_names="target",
    cat_names = cat_names,
    cont_names = cont_names,
    procs = [Categorify, FillMissing, Normalize],
    splits=splits
)

# and convert it do dataloader with batch size of ...
batch_size = 4096
dls = to.dataloaders(bs=batch_size)

In [None]:
cbs = [ EarlyStoppingCallback(monitor='valid_loss', min_delta=0.0, patience=3),
        SaveModelCallback(monitor='valid_loss', comp=None, min_delta=0.0, 
                          fname='fastai', every_epoch=False, 
                          with_opt=False, reset_on_fit=True)] 
 
config = tabular_config(ps=0.5, embed_p=0.0)
learn = tabular_learner(dls, metrics=accuracy, config=config, wd=5e-1, cbs=cbs, layers=[200, 42])

In [None]:
learn.lr_find()

In [None]:
learn.fit_one_cycle(4, 1e-2)

In [None]:
learn.load('fastai')
learn.fit_one_cycle(4,1e-3)

In [None]:
learn.load('fastai')
learn.fit_one_cycle(4,1e-4)

In [None]:
learn.load('fastai')

In [None]:
dl = learn.dls.test_dl(df_test)

pred = learn.get_preds(dl=dl)

df_sub.loc[:,:] = pred[0].numpy()

df_sub.to_csv('submission.csv')