Upgrading fastai to latest version

In [None]:
!pip install fastai --upgrade

In [None]:
from fastai.tabular.all import *
from pathlib import Path

In [None]:
base_path = Path('../input/spaceship-titanic')

Creating training dataframe and it's basic exploration

In [None]:
train_df = pd.read_csv(base_path/'train.csv')

In [None]:
train_df.head()

In [None]:
train_df.describe()

In [None]:
train_df.dtypes

Getting the categorical and continuous columns

In [None]:
cat_cols = train_df.select_dtypes(include='object').columns.tolist()
cat_cols

In [None]:
cont_cols = train_df.select_dtypes(exclude='object').columns.tolist()
cont_cols

Since *Transported* is our label, we are not going to consider in the continuous columns.

In [None]:
cont_cols = cont_cols[:-1]
cont_cols

Splitting the training data into training and validation sets randomly

In [None]:
splits = RandomSplitter(valid_pct=0.2, seed=20)(train_df)

In [None]:
dls = TabularPandas(
    df = train_df,
    splits = splits,
    procs = [Categorify, FillMissing, Normalize],
    cat_names = cat_cols,
    cont_names = cont_cols,
    y_names = "Transported",
    y_block = CategoryBlock(),
).dataloaders(path='.')

In [None]:
learn = tabular_learner(dls, metrics=accuracy, layers=[32,16,8])

In [None]:
#learn.lr_find(suggest_funcs=(slide,valley))

In [None]:
learn.fit(20)

In [None]:
test_df = pd.read_csv(base_path/'test.csv')

In [None]:
test_df.head()

In [None]:
tst_dl = learn.dls.test_dl(test_df)

In [None]:
preds, _ = learn.get_preds(dl=tst_dl)

In [None]:
preds.shape

In [None]:
preds[3,1]

In [None]:
test_df['Transported'] = (preds[:,1] > 0.5)

In [None]:
test_df.head()

In [None]:
sub_df = test_df[['PassengerId', 'Transported']]

In [None]:
sub_df.head()

In [None]:
sub_df.to_csv('submission.csv', index=False)