Four options for handling missing values (NaNs):

Drop rows containing NaNs
Drop columns containing NaNs
Fill NaNs with imputed values
Use a model that natively handles NaNs (NEW!)

In [1]:
 import pandas as pd
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv', nrows=175)

In [2]:
train = train[['Survived', 'Age', 'Fare', 'Pclass']]
test = test[['Age', 'Fare', 'Pclass']]

In [3]:
# count the number of NaNs in each column
train.isna().sum()

Survived      0
Age         177
Fare          0
Pclass        0
dtype: int64

In [4]:

test.isna().sum()

Age       36
Fare       1
Pclass     0
dtype: int64

In [5]:
label = train.pop('Survived')

In [7]:
# new in 0.22: this estimator (experimental) has native support for NaNs
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingClassifier

In [8]:
clf = HistGradientBoostingClassifier()

In [9]:
# no errors, despite NaNs in train and test!
clf.fit(train, label)
clf.predict(test)

array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0,
       1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0,
       1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1,
       0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0,
       0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
      dtype=int64)