In [184]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [185]:
# x_train + y_train
train_df = pd.read_csv("titanic/train.csv", index_col="PassengerId")
train_df.keys()

Index(['Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket',
       'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [186]:
# x_test
test_df = pd.read_csv("titanic/test.csv", index_col="PassengerId")
test_df.keys()

Index(['Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare',
       'Cabin', 'Embarked'],
      dtype='object')

In [187]:
# y_test
test_results_df = pd.read_csv("titanic/gender_submission.csv", index_col="PassengerId")
test_results_df.keys()

Index(['Survived'], dtype='object')

In [188]:
test_df.shape, test_results_df.shape

((418, 10), (418, 1))

In [189]:
test_combined_df = pd.concat([test_df, test_results_df], axis=1)
test_combined_df

Unnamed: 0_level_0,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Survived
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q,0
893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0000,,S,1
894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q,0
895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S,0
896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S,1
...,...,...,...,...,...,...,...,...,...,...,...
1305,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.0500,,S,0
1306,1,"Oliva y Ocana, Dona. Fermina",female,39.0,0,0,PC 17758,108.9000,C105,C,1
1307,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.2500,,S,0
1308,3,"Ware, Mr. Frederick",male,,0,0,359309,8.0500,,S,0


In [190]:
from sklearn.preprocessing import LabelEncoder

In [191]:
def label_encode(df, column):
    df[column] = LabelEncoder().fit_transform(df[column] )
    return df

In [192]:
def drop_column(df, column):
    return df.drop(columns=[column], axis=1)

In [193]:
def keep_first_letter(df, column):
    df[column] = df[column].astype(np.str_).str[0] 
    return df 

In [194]:
test_combined_df = label_encode(test_combined_df, "Embarked")
test_combined_df

Unnamed: 0_level_0,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Survived
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,1,0
893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0000,,2,1
894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,1,0
895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,2,0
896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,2,1
...,...,...,...,...,...,...,...,...,...,...,...
1305,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.0500,,2,0
1306,1,"Oliva y Ocana, Dona. Fermina",female,39.0,0,0,PC 17758,108.9000,C105,0,1
1307,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.2500,,2,0
1308,3,"Ware, Mr. Frederick",male,,0,0,359309,8.0500,,2,0


In [195]:
test_combined_df["Cabin"].unique()

array([nan, 'B45', 'E31', 'B57 B59 B63 B66', 'B36', 'A21', 'C78', 'D34',
       'D19', 'A9', 'D15', 'C31', 'C23 C25 C27', 'F G63', 'B61', 'C53',
       'D43', 'C130', 'C132', 'C101', 'C55 C57', 'B71', 'C46', 'C116',
       'F', 'A29', 'G6', 'C6', 'C28', 'C51', 'E46', 'C54', 'C97', 'D22',
       'B10', 'F4', 'E45', 'E52', 'D30', 'B58 B60', 'E34', 'C62 C64',
       'A11', 'B11', 'C80', 'F33', 'C85', 'D37', 'C86', 'D21', 'C89',
       'F E46', 'A34', 'D', 'B26', 'C22 C26', 'B69', 'C32', 'B78',
       'F E57', 'F2', 'A18', 'C106', 'B51 B53 B55', 'D10 D12', 'E60',
       'E50', 'E39 E41', 'B52 B54 B56', 'C39', 'B24', 'D28', 'B41', 'C7',
       'D40', 'D38', 'C105'], dtype=object)

In [196]:
test_combined_df["Sex"] = LabelEncoder().fit_transform(test_combined_df["Sex"] )
test_combined_df

Unnamed: 0_level_0,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Survived
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
892,3,"Kelly, Mr. James",1,34.5,0,0,330911,7.8292,,1,0
893,3,"Wilkes, Mrs. James (Ellen Needs)",0,47.0,1,0,363272,7.0000,,2,1
894,2,"Myles, Mr. Thomas Francis",1,62.0,0,0,240276,9.6875,,1,0
895,3,"Wirz, Mr. Albert",1,27.0,0,0,315154,8.6625,,2,0
896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",0,22.0,1,1,3101298,12.2875,,2,1
...,...,...,...,...,...,...,...,...,...,...,...
1305,3,"Spector, Mr. Woolf",1,,0,0,A.5. 3236,8.0500,,2,0
1306,1,"Oliva y Ocana, Dona. Fermina",0,39.0,0,0,PC 17758,108.9000,C105,0,1
1307,3,"Saether, Mr. Simon Sivertsen",1,38.5,0,0,SOTON/O.Q. 3101262,7.2500,,2,0
1308,3,"Ware, Mr. Frederick",1,,0,0,359309,8.0500,,2,0


In [197]:
test_combined_df.pipe(label_encode, column="Sex").pipe(label_encode, column="Embarked")
test_combined_df

Unnamed: 0_level_0,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Survived
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
892,3,"Kelly, Mr. James",1,34.5,0,0,330911,7.8292,,1,0
893,3,"Wilkes, Mrs. James (Ellen Needs)",0,47.0,1,0,363272,7.0000,,2,1
894,2,"Myles, Mr. Thomas Francis",1,62.0,0,0,240276,9.6875,,1,0
895,3,"Wirz, Mr. Albert",1,27.0,0,0,315154,8.6625,,2,0
896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",0,22.0,1,1,3101298,12.2875,,2,1
...,...,...,...,...,...,...,...,...,...,...,...
1305,3,"Spector, Mr. Woolf",1,,0,0,A.5. 3236,8.0500,,2,0
1306,1,"Oliva y Ocana, Dona. Fermina",0,39.0,0,0,PC 17758,108.9000,C105,0,1
1307,3,"Saether, Mr. Simon Sivertsen",1,38.5,0,0,SOTON/O.Q. 3101262,7.2500,,2,0
1308,3,"Ware, Mr. Frederick",1,,0,0,359309,8.0500,,2,0


In [198]:
test_combined_df.pipe(drop_column, "Name").pipe(drop_column, "Ticket").pipe(drop_column, "Fare")

Unnamed: 0_level_0,Pclass,Sex,Age,SibSp,Parch,Cabin,Embarked,Survived
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
892,3,1,34.5,0,0,,1,0
893,3,0,47.0,1,0,,2,1
894,2,1,62.0,0,0,,1,0
895,3,1,27.0,0,0,,2,0
896,3,0,22.0,1,1,,2,1
...,...,...,...,...,...,...,...,...
1305,3,1,,0,0,,2,0
1306,1,0,39.0,0,0,C105,0,1
1307,3,1,38.5,0,0,,2,0
1308,3,1,,0,0,,2,0


In [199]:
test_combined_df.pipe(keep_first_letter, "Cabin")

Unnamed: 0_level_0,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Survived
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
892,3,"Kelly, Mr. James",1,34.5,0,0,330911,7.8292,n,1,0
893,3,"Wilkes, Mrs. James (Ellen Needs)",0,47.0,1,0,363272,7.0000,n,2,1
894,2,"Myles, Mr. Thomas Francis",1,62.0,0,0,240276,9.6875,n,1,0
895,3,"Wirz, Mr. Albert",1,27.0,0,0,315154,8.6625,n,2,0
896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",0,22.0,1,1,3101298,12.2875,n,2,1
...,...,...,...,...,...,...,...,...,...,...,...
1305,3,"Spector, Mr. Woolf",1,,0,0,A.5. 3236,8.0500,n,2,0
1306,1,"Oliva y Ocana, Dona. Fermina",0,39.0,0,0,PC 17758,108.9000,C,0,1
1307,3,"Saether, Mr. Simon Sivertsen",1,38.5,0,0,SOTON/O.Q. 3101262,7.2500,n,2,0
1308,3,"Ware, Mr. Frederick",1,,0,0,359309,8.0500,n,2,0


In [200]:
test_combined_df["Cabin"].unique()

array(['n', 'B', 'E', 'A', 'C', 'D', 'F', 'G'], dtype=object)

In [201]:
test_combined_df.pipe(label_encode, "Cabin")

Unnamed: 0_level_0,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Survived
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
892,3,"Kelly, Mr. James",1,34.5,0,0,330911,7.8292,7,1,0
893,3,"Wilkes, Mrs. James (Ellen Needs)",0,47.0,1,0,363272,7.0000,7,2,1
894,2,"Myles, Mr. Thomas Francis",1,62.0,0,0,240276,9.6875,7,1,0
895,3,"Wirz, Mr. Albert",1,27.0,0,0,315154,8.6625,7,2,0
896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",0,22.0,1,1,3101298,12.2875,7,2,1
...,...,...,...,...,...,...,...,...,...,...,...
1305,3,"Spector, Mr. Woolf",1,,0,0,A.5. 3236,8.0500,7,2,0
1306,1,"Oliva y Ocana, Dona. Fermina",0,39.0,0,0,PC 17758,108.9000,2,0,1
1307,3,"Saether, Mr. Simon Sivertsen",1,38.5,0,0,SOTON/O.Q. 3101262,7.2500,7,2,0
1308,3,"Ware, Mr. Frederick",1,,0,0,359309,8.0500,7,2,0


In [202]:
train_df = train_df.pipe(label_encode, column="Sex").pipe(label_encode, column="Embarked").pipe(drop_column, "Name").pipe(drop_column, "Ticket").pipe(drop_column, "Fare").pipe(keep_first_letter, "Cabin").pipe(label_encode, "Cabin")
train_df

Unnamed: 0_level_0,Survived,Pclass,Sex,Age,SibSp,Parch,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,0,3,1,22.0,1,0,8,2
2,1,1,0,38.0,1,0,2,0
3,1,3,0,26.0,0,0,8,2
4,1,1,0,35.0,1,0,2,2
5,0,3,1,35.0,0,0,8,2
...,...,...,...,...,...,...,...,...
887,0,2,1,27.0,0,0,8,2
888,1,1,0,19.0,0,0,1,2
889,0,3,0,,1,2,8,2
890,1,1,1,26.0,0,0,2,0


In [203]:
test_combined_df = test_combined_df.pipe(label_encode, column="Sex").pipe(label_encode, column="Embarked").pipe(drop_column, "Name").pipe(drop_column, "Ticket").pipe(drop_column, "Fare").pipe(keep_first_letter, "Cabin").pipe(label_encode, "Cabin")
test_combined_df

Unnamed: 0_level_0,Pclass,Sex,Age,SibSp,Parch,Cabin,Embarked,Survived
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
892,3,1,34.5,0,0,7,1,0
893,3,0,47.0,1,0,7,2,1
894,2,1,62.0,0,0,7,1,0
895,3,1,27.0,0,0,7,2,0
896,3,0,22.0,1,1,7,2,1
...,...,...,...,...,...,...,...,...
1305,3,1,,0,0,7,2,0
1306,1,0,39.0,0,0,2,0,1
1307,3,1,38.5,0,0,7,2,0
1308,3,1,,0,0,7,2,0


In [204]:
train_df.describe()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Cabin,Embarked
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0,891.0
mean,0.383838,2.308642,0.647587,29.699118,0.523008,0.381594,6.716049,1.538721
std,0.486592,0.836071,0.47799,14.526497,1.102743,0.806057,2.460739,0.794231
min,0.0,1.0,0.0,0.42,0.0,0.0,0.0,0.0
25%,0.0,2.0,0.0,20.125,0.0,0.0,8.0,1.0
50%,0.0,3.0,1.0,28.0,0.0,0.0,8.0,2.0
75%,1.0,3.0,1.0,38.0,1.0,0.0,8.0,2.0
max,1.0,3.0,1.0,80.0,8.0,6.0,8.0,3.0


In [205]:
test_combined_df.describe()


Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Cabin,Embarked,Survived
count,418.0,418.0,332.0,418.0,418.0,418.0,418.0,418.0
mean,2.26555,0.636364,30.27259,0.447368,0.392344,5.976077,1.401914,0.363636
std,0.841838,0.481622,14.181209,0.89676,0.981429,2.048431,0.854496,0.481622
min,1.0,0.0,0.17,0.0,0.0,0.0,0.0,0.0
25%,1.0,0.0,21.0,0.0,0.0,7.0,1.0,0.0
50%,3.0,1.0,27.0,0.0,0.0,7.0,2.0,0.0
75%,3.0,1.0,39.0,1.0,0.0,7.0,2.0,1.0
max,3.0,1.0,76.0,8.0,9.0,7.0,2.0,1.0


In [206]:
from sklearn.preprocessing import StandardScaler

In [207]:
scaled_train_df = StandardScaler().fit_transform(train_df)
scaled_train_df

array([[-0.78927234,  0.82737724,  0.73769513, ..., -0.47367361,
         0.52206745,  0.58111394],
       [ 1.2669898 , -1.56610693, -1.35557354, ..., -0.47367361,
        -1.91759389, -1.93846038],
       [ 1.2669898 ,  0.82737724, -1.35557354, ..., -0.47367361,
         0.52206745,  0.58111394],
       ...,
       [-0.78927234,  0.82737724, -1.35557354, ...,  2.00893337,
         0.52206745,  0.58111394],
       [ 1.2669898 , -1.56610693,  0.73769513, ..., -0.47367361,
        -1.91759389, -1.93846038],
       [-0.78927234,  0.82737724,  0.73769513, ..., -0.47367361,
         0.52206745, -0.67867322]], shape=(891, 8))

In [208]:
scaled_train_df = pd.DataFrame(scaled_train_df, columns=train_df.keys())
scaled_train_df

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Cabin,Embarked
0,-0.789272,0.827377,0.737695,-0.530377,0.432793,-0.473674,0.522067,0.581114
1,1.266990,-1.566107,-1.355574,0.571831,0.432793,-0.473674,-1.917594,-1.938460
2,1.266990,0.827377,-1.355574,-0.254825,-0.474545,-0.473674,0.522067,0.581114
3,1.266990,-1.566107,-1.355574,0.365167,0.432793,-0.473674,-1.917594,0.581114
4,-0.789272,0.827377,0.737695,0.365167,-0.474545,-0.473674,0.522067,0.581114
...,...,...,...,...,...,...,...,...
886,-0.789272,-0.369365,0.737695,-0.185937,-0.474545,-0.473674,0.522067,0.581114
887,1.266990,-1.566107,-1.355574,-0.737041,-0.474545,-0.473674,-2.324204,0.581114
888,-0.789272,0.827377,-1.355574,,0.432793,2.008933,0.522067,0.581114
889,1.266990,-1.566107,0.737695,-0.254825,-0.474545,-0.473674,-1.917594,-1.938460


In [209]:
scaled_train_df["Survived"] = train_df["Survived"]

# Persoane despre care nu avem informatie putem sa presupunem ca au decedat

In [210]:
scaled_train_df["Survived"] = scaled_train_df["Survived"].fillna(0)

# Eliminam valorile care nu au varsta cunoscuta

In [211]:
scaled_train_df = scaled_train_df [ scaled_train_df["Age"].notna()]
scaled_train_df

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Cabin,Embarked
0,0.0,0.827377,0.737695,-0.530377,0.432793,-0.473674,0.522067,0.581114
1,0.0,-1.566107,-1.355574,0.571831,0.432793,-0.473674,-1.917594,-1.938460
2,1.0,0.827377,-1.355574,-0.254825,-0.474545,-0.473674,0.522067,0.581114
3,1.0,-1.566107,-1.355574,0.365167,0.432793,-0.473674,-1.917594,0.581114
4,1.0,0.827377,0.737695,0.365167,-0.474545,-0.473674,0.522067,0.581114
...,...,...,...,...,...,...,...,...
885,0.0,0.827377,-1.355574,0.640719,-0.474545,5.732844,0.522067,-0.678673
886,0.0,-0.369365,0.737695,-0.185937,-0.474545,-0.473674,0.522067,0.581114
887,0.0,-1.566107,-1.355574,-0.737041,-0.474545,-0.473674,-2.324204,0.581114
889,0.0,-1.566107,0.737695,-0.254825,-0.474545,-0.473674,-1.917594,-1.938460


In [212]:
y_train_scaled = scaled_train_df[["Survived"]]
y_train_scaled.head(2)

Unnamed: 0,Survived
0,0.0
1,0.0


In [213]:
x_train_scaled = scaled_train_df.drop("Survived", axis=1)
x_train_scaled.head(2)

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Cabin,Embarked
0,0.827377,0.737695,-0.530377,0.432793,-0.473674,0.522067,0.581114
1,-1.566107,-1.355574,0.571831,0.432793,-0.473674,-1.917594,-1.93846


In [214]:
x_train_scaled.isnull().sum()

Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Cabin       0
Embarked    0
dtype: int64

In [215]:
filled_x = x_train_scaled.fillna(33.5)
filled_x.isnull().sum()

Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Cabin       0
Embarked    0
dtype: int64

In [216]:
x_exclude_nan = x_train_scaled[x_train_scaled["Age"].notna()]
x_exclude_nan 

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Cabin,Embarked
0,0.827377,0.737695,-0.530377,0.432793,-0.473674,0.522067,0.581114
1,-1.566107,-1.355574,0.571831,0.432793,-0.473674,-1.917594,-1.938460
2,0.827377,-1.355574,-0.254825,-0.474545,-0.473674,0.522067,0.581114
3,-1.566107,-1.355574,0.365167,0.432793,-0.473674,-1.917594,0.581114
4,0.827377,0.737695,0.365167,-0.474545,-0.473674,0.522067,0.581114
...,...,...,...,...,...,...,...
885,0.827377,-1.355574,0.640719,-0.474545,5.732844,0.522067,-0.678673
886,-0.369365,0.737695,-0.185937,-0.474545,-0.473674,0.522067,0.581114
887,-1.566107,-1.355574,-0.737041,-0.474545,-0.473674,-2.324204,0.581114
889,-1.566107,0.737695,-0.254825,-0.474545,-0.473674,-1.917594,-1.938460


In [217]:
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.tree import DecisionTreeClassifier

In [218]:
tree_model = DecisionTreeClassifier()
tree_model

0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [219]:
x_train_scaled.shape, y_train_scaled.shape

((714, 7), (714, 1))

In [220]:
y_train_scaled.head(2)

Unnamed: 0,Survived
0,0.0
1,0.0


In [221]:
tree_model.fit(x_train_scaled, y_train_scaled)
tree_model

0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [222]:
test_df.isnull().sum()

Pclass        0
Name          0
Sex           0
Age          86
SibSp         0
Parch         0
Ticket        0
Fare          1
Cabin       327
Embarked      0
dtype: int64