In [None]:
!pip install -q fastai==2.2.5 fastcore==1.3.19 fast-tabnet==0.2.0 

In [None]:
!pip install wandb

In [None]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

import numpy as np
import pandas as pd
from tqdm import tqdm, trange
from sklearn import model_selection
from fastai.tabular.all import *
from fastai.callback.wandb import *
from fast_tabnet.core import *
SEED = 42
set_seed(SEED, reproducible=True)

In [None]:
import wandb
wandb.login()

In [None]:
import wandb

try:
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    api_key = user_secrets.get_secret("wandb_api")
    wandb.login(key=api_key)
    anony = None
except:
    anony = "must"
    print('If you want to use your W&B account, go to Add-ons -> Secrets and provide your W&B access token. Use the Label name as wandb_api. \nGet your W&B access token from here: https://wandb.ai/authorize')

In [None]:
train_df = pd.read_csv('../input/tabular-playground-series-oct-2021/train.csv')
test_df = pd.read_csv('../input/tabular-playground-series-oct-2021/test.csv')

In [None]:
# Thanks to this awesome notebook for giving feature insights
#https://www.kaggle.com/desalegngeb/octps-2021-eda-xgboost-lgbm
y_names = ['target']
cont_names = ['f0', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 
              'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 
              'f19', 'f20', 'f21', 'f23', 'f24', 'f25', 'f26', 'f27', 'f28', 
              'f29', 'f30', 'f31', 'f32', 'f33', 'f34', 'f35', 'f36', 'f37', 
              'f38', 'f39', 'f40', 'f41', 'f42', 'f44', 'f45', 'f46', 'f47', 'f48', 
              'f49', 'f50', 'f51', 'f52', 'f53', 'f54', 'f55', 'f56', 'f57', 
              'f58', 'f59', 'f60', 'f61', 'f62', 'f63', 'f64', 'f65', 'f66', 'f67', 
              'f68', 'f69', 'f70', 'f71', 'f72', 'f73', 'f74', 'f75', 'f76', 'f77', 
              'f78', 'f79', 'f80', 'f81', 'f82', 'f83', 'f84', 'f85', 'f86', 'f87', 'f88', 'f89', 'f90', 
              'f91', 'f92', 'f93', 'f94', 'f95', 'f96', 'f97', 'f98', 'f99', 'f100', 'f101', 
              'f102', 'f103', 'f104', 'f105', 'f106', 'f107', 'f108', 'f109', 'f110', 
              'f111', 'f112', 'f113', 'f114', 'f115', 'f116', 'f117', 'f118', 'f119', 'f120', 
              'f121', 'f122', 'f123', 'f124', 'f125', 'f126', 'f127', 'f128', 'f129', 'f130', 
              'f131', 'f132', 'f133', 'f134', 'f135', 'f136', 'f137', 'f138', 'f139', 'f140', 
              'f141', 'f142', 'f143', 'f144', 'f145', 'f146', 'f147', 'f148', 'f149', 'f150', 
              'f151', 'f152', 'f153', 'f154', 'f155', 'f156', 'f157', 'f158', 'f159', 'f160', 
              'f161', 'f162', 'f163', 'f164', 'f165', 'f166', 'f167', 'f168', 'f169', 'f170', 
              'f171', 'f172', 'f173', 'f174', 'f175', 'f176', 'f177', 'f178', 'f179', 'f180', 
              'f181', 'f182', 'f183', 'f184', 'f185', 'f186', 'f187', 'f188', 'f189', 'f190', 
              'f191', 'f192', 'f193', 'f194', 'f195', 'f196', 'f197', 'f198', 'f199', 'f200', 
              'f201', 'f202', 'f203', 'f204', 'f205', 'f206', 'f207', 'f208', 'f209', 'f210', 
              'f211', 'f212', 'f213', 'f214', 'f215', 'f216', 'f217', 'f218', 'f219', 'f220', 
              'f221', 'f222', 'f223', 'f224', 'f225', 'f226', 'f227', 'f228', 'f229', 'f230', 
              'f231', 'f232', 'f233', 'f234', 'f235', 'f236', 'f237', 'f238', 'f239', 'f240', 'f241']

cat_names = ['f22', 'f43', 'f242', 'f243', 'f244', 'f245', 'f246', 'f247', 'f248', 
             'f249', 'f250', 'f251', 'f252', 'f253', 'f254', 'f255', 'f256', 
             'f257', 'f258', 'f259', 'f260', 'f261', 'f262', 'f263', 'f264', 
             'f265', 'f266', 'f267', 'f268', 'f269', 'f270', 'f271', 'f272', 'f273', 'f274', 
             'f275', 'f276', 'f277', 'f278', 'f279', 'f280', 'f281', 'f282', 'f283', 'f284']
procs = [Categorify, FillMissing, Normalize]
splits = RandomSplitter(seed=SEED)(range_of(train_df))
bs = 32

In [None]:
path='../input/tabular-playground-series-oct-2021/'

In [None]:
dls = TabularDataLoaders.from_csv('../input/tabular-playground-series-oct-2021/train.csv', path=path, y_names="target",
    cat_names = cat_names,
    cont_names = cont_names,
    procs = [Categorify, FillMissing, Normalize])

In [None]:
splits = RandomSplitter(valid_pct=0.2)(range_of(train_df))
to = TabularPandas(train_df, procs=[Categorify, FillMissing,Normalize],
                   cat_names = cat_names,
                   cont_names = cont_names,
                   y_names='target',
                   splits=splits)

In [None]:
##create a data loader

dls = to.dataloaders(bs=bs)
dls.show_batch()

In [None]:
wandb.init(project='TPS-Oct', 
                 job_type='Train',
                 anonymous='must')

In [None]:
score_multi = RocAucBinary()
learn = tabular_learner(dls, metrics=score_multi)

In [None]:
cbs =  [WandbCallback()]

In [None]:
learn.lr_find()

In [None]:
learn.fit_one_cycle(1, 0.001,cbs=cbs)

## *WORK IN PROGRESS!*