In [None]:
from fastai.tabular import *
from fastai.metrics import rmse

In [None]:
def reset_seed(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
#     tf.set_random_seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

reset_seed()

In [None]:
path = Path('/kaggle/input/google-quest-challenge')
path.ls()

In [None]:
train_data = pd.read_csv(path/'train.csv', index_col=[0])
train_data.head()

In [None]:
test_data = pd.read_csv(path/'test.csv', index_col=[0])
test_data.head()

In [None]:
cat_names = ['category', 'host']
# cont_names = train_data.columns[10:].tolist()
dep_var = train_data.columns[10:].tolist()
procs = [Categorify]

In [None]:
data = (TabularList.from_df(train_data, path=path, cat_names=cat_names, procs=procs)
                           .split_by_rand_pct(0.1)
                           .label_from_df(cols=dep_var, label_cls=FloatList, log=False)
                           .add_test(TabularList.from_df(test_data, path=path, cat_names=cat_names))
                           .databunch())

In [None]:
data.show_batch(rows=5)

In [None]:
learn = tabular_learner(data, layers=[10, 20, 30, 20, 10], metrics=rmse, emb_drop=0.2, ps=[0.2, 0.2, 0.2, 0.2, 0.2])
learn.loss_func=MSELossFlat()

In [None]:
learn.model_dir = Path('/kaggle/working')

In [None]:
learn.lr_find()

In [None]:
learn.recorder.plot(suggestion=True)

In [None]:
learn.fit_one_cycle(10, learn.recorder.min_grad_lr, wd=1e-4)

In [None]:
preds,y = learn.get_preds(DatasetType.Test)

In [None]:
preds.data.numpy()

In [None]:
preds.shape

In [None]:
sample =  pd.read_csv(path/'sample_submission.csv')
# sample.to_csv('submission.csv', index=False)
sample.head()

In [None]:
sample.iloc[:, 1:] = preds.data.numpy()

In [None]:
sample.head()

In [None]:
sample.to_csv('submission.csv', index=False)