## Importando bibliotecas e dados

In [1]:
import pandas as pd
import numpy as np
from pycaret.classification import setup, compare_models, models, create_model
from pycaret.classification import predict_model, tune_model, plot_model
from pycaret.classification import evaluate_model, finalize_model, save_model
from pycaret.classification import load_model
from pycaret.utils import check_metric
from sklearn.model_selection import train_test_split
pd.set_option('display.max_columns',None)

In [2]:
train_raw_data=pd.read_csv('data/train.csv')
test_raw_data=pd.read_csv('data/test.csv')

In [3]:
train_raw_data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [4]:
test_raw_data.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [5]:
trainrow=train_raw_data.shape[0]
trainrow

891

In [6]:
testrow=test_raw_data.shape[0]
testrow

418

In [7]:
y_train=train_raw_data['Survived'].copy()

In [8]:
train_raw_data=train_raw_data.drop(['Survived'],1)

In [9]:
combine=pd.concat([train_raw_data,test_raw_data])
combine.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [10]:
print(combine.shape)
print(train_raw_data.shape)
print(test_raw_data.shape)

(1309, 11)
(891, 11)
(418, 11)


In [11]:
combine.isnull().sum()

PassengerId       0
Pclass            0
Name              0
Sex               0
Age             263
SibSp             0
Parch             0
Ticket            0
Fare              1
Cabin          1014
Embarked          2
dtype: int64

In [12]:
combine['Embarked']=combine['Embarked'].fillna(combine['Embarked'].value_counts().index[0])

In [13]:
combine['Cabin'].value_counts()

C23 C25 C27        6
G6                 5
B57 B59 B63 B66    5
C78                4
D                  4
                  ..
E60                1
B52 B54 B56        1
E10                1
C53                1
C103               1
Name: Cabin, Length: 186, dtype: int64

In [14]:
combine['Cabin']=combine['Cabin'].fillna('U')

In [15]:
combine['Cabin'].value_counts()

U                  1014
C23 C25 C27           6
B57 B59 B63 B66       5
G6                    5
F4                    4
                   ... 
D9                    1
A24                   1
C50                   1
C110                  1
C103                  1
Name: Cabin, Length: 187, dtype: int64

In [16]:
combine['Cabin']=combine['Cabin'].astype(str).str[0]

In [17]:
combine.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,U,S
1,2,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C,C
2,3,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,U,S
3,4,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C,S
4,5,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,U,S


In [18]:
combine.loc[combine['Fare'].isnull()]

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
152,1044,3,"Storey, Mr. Thomas",male,60.5,0,0,3701,,U,S


In [19]:
combine['Fare']=combine['Fare'].fillna(combine.loc[(combine['Pclass']==3) & 
                                                   (combine['Sex']=="male") & 
                                                   (combine['Age']<65) & 
                                                   (combine['Age']>55)].dropna()['Fare'].mean())

In [20]:
combine.isnull().sum()

PassengerId      0
Pclass           0
Name             0
Sex              0
Age            263
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin            0
Embarked         0
dtype: int64

In [21]:
passengerids=test_raw_data['PassengerId']
passengerids

0       892
1       893
2       894
3       895
4       896
       ... 
413    1305
414    1306
415    1307
416    1308
417    1309
Name: PassengerId, Length: 418, dtype: int64

In [22]:
combine=combine.drop(['PassengerId','Ticket'],1)

In [23]:
combine.head()

Unnamed: 0,Pclass,Name,Sex,Age,SibSp,Parch,Fare,Cabin,Embarked
0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,7.25,U,S
1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,71.2833,C,C
2,3,"Heikkinen, Miss. Laina",female,26.0,0,0,7.925,U,S
3,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,53.1,C,S
4,3,"Allen, Mr. William Henry",male,35.0,0,0,8.05,U,S


In [24]:
combine['familysize']=combine['SibSp']+combine['Parch']+1
combine.head()

Unnamed: 0,Pclass,Name,Sex,Age,SibSp,Parch,Fare,Cabin,Embarked,familysize
0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,7.25,U,S,2
1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,71.2833,C,C,2
2,3,"Heikkinen, Miss. Laina",female,26.0,0,0,7.925,U,S,1
3,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,53.1,C,S,2
4,3,"Allen, Mr. William Henry",male,35.0,0,0,8.05,U,S,1


In [25]:
combine['Title'] = combine.Name.str.extract(' ([A-Za-z]+)\.', expand=False)
combine.head()

Unnamed: 0,Pclass,Name,Sex,Age,SibSp,Parch,Fare,Cabin,Embarked,familysize,Title
0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,7.25,U,S,2,Mr
1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,71.2833,C,C,2,Mrs
2,3,"Heikkinen, Miss. Laina",female,26.0,0,0,7.925,U,S,1,Miss
3,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,53.1,C,S,2,Mrs
4,3,"Allen, Mr. William Henry",male,35.0,0,0,8.05,U,S,1,Mr


In [26]:
combine=combine.drop(['Name'],1)
combine.head()

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Cabin,Embarked,familysize,Title
0,3,male,22.0,1,0,7.25,U,S,2,Mr
1,1,female,38.0,1,0,71.2833,C,C,2,Mrs
2,3,female,26.0,0,0,7.925,U,S,1,Miss
3,1,female,35.0,1,0,53.1,C,S,2,Mrs
4,3,male,35.0,0,0,8.05,U,S,1,Mr


In [27]:
combine=combine.drop(['SibSp','Parch'],1)
combine.head()

Unnamed: 0,Pclass,Sex,Age,Fare,Cabin,Embarked,familysize,Title
0,3,male,22.0,7.25,U,S,2,Mr
1,1,female,38.0,71.2833,C,C,2,Mrs
2,3,female,26.0,7.925,U,S,1,Miss
3,1,female,35.0,53.1,C,S,2,Mrs
4,3,male,35.0,8.05,U,S,1,Mr


In [28]:
combine['Sex']=combine['Sex'].map({'male':0,'female':1})
combine.head()

Unnamed: 0,Pclass,Sex,Age,Fare,Cabin,Embarked,familysize,Title
0,3,0,22.0,7.25,U,S,2,Mr
1,1,1,38.0,71.2833,C,C,2,Mrs
2,3,1,26.0,7.925,U,S,1,Miss
3,1,1,35.0,53.1,C,S,2,Mrs
4,3,0,35.0,8.05,U,S,1,Mr


In [29]:
print(combine.Pclass.unique())
print(combine.Sex.unique())

[3 1 2]
[0 1]


In [30]:
for i in range(0,2):
    for j in range(0,3):
        print(i,j+1)
        temp_dataset=combine[(combine['Sex']==i) & (combine['Pclass']==j+1)]['Age'].dropna()
        print(temp_dataset)
        combine.loc[(combine.Age.isnull()) & (combine.Sex==i) & (combine.Pclass==j+1),'Age']=int(temp_dataset.median())

0 1
6      54.0
23     28.0
27     19.0
30     40.0
34     28.0
       ... 
378    55.0
390    23.0
403    17.0
404    43.0
407    50.0
Name: Age, Length: 151, dtype: float64
0 2
20     35.0
21     34.0
33     66.0
70     32.0
72     21.0
       ... 
387    57.0
393    47.0
401    38.0
405    20.0
406    23.0
Name: Age, Length: 158, dtype: float64
0 3
0      22.0
4      35.0
7       2.0
12     20.0
13     39.0
       ... 
394    29.0
396    24.0
398    22.0
399    31.0
415    38.5
Name: Age, Length: 349, dtype: float64
1 1
1      38.0
3      35.0
11     58.0
52     49.0
61     38.0
       ... 
397    48.0
400    30.0
402    22.0
411    37.0
414    39.0
Name: Age, Length: 133, dtype: float64
1 2
9      14.0
15     55.0
41     27.0
43      3.0
53     29.0
       ... 
330    48.0
349    31.0
361    24.0
362    31.0
385    24.0
Name: Age, Length: 103, dtype: float64
1 3
2      26.0
8      27.0
10      4.0
14     14.0
18     31.0
       ... 
367    22.0
376    22.0
383    19.0
409     3.0
4

In [31]:
combine.isnull().sum()

Pclass        0
Sex           0
Age           0
Fare          0
Cabin         0
Embarked      0
familysize    0
Title         0
dtype: int64

In [32]:
combine.head()

Unnamed: 0,Pclass,Sex,Age,Fare,Cabin,Embarked,familysize,Title
0,3,0,22.0,7.25,U,S,2,Mr
1,1,1,38.0,71.2833,C,C,2,Mrs
2,3,1,26.0,7.925,U,S,1,Miss
3,1,1,35.0,53.1,C,S,2,Mrs
4,3,0,35.0,8.05,U,S,1,Mr


In [33]:
combine_checkpoint=combine.copy()
combine.head()

Unnamed: 0,Pclass,Sex,Age,Fare,Cabin,Embarked,familysize,Title
0,3,0,22.0,7.25,U,S,2,Mr
1,1,1,38.0,71.2833,C,C,2,Mrs
2,3,1,26.0,7.925,U,S,1,Miss
3,1,1,35.0,53.1,C,S,2,Mrs
4,3,0,35.0,8.05,U,S,1,Mr


In [34]:
combine=combine_checkpoint.copy()
combine.head()

Unnamed: 0,Pclass,Sex,Age,Fare,Cabin,Embarked,familysize,Title
0,3,0,22.0,7.25,U,S,2,Mr
1,1,1,38.0,71.2833,C,C,2,Mrs
2,3,1,26.0,7.925,U,S,1,Miss
3,1,1,35.0,53.1,C,S,2,Mrs
4,3,0,35.0,8.05,U,S,1,Mr


In [35]:
combine['Age_Band']=pd.cut(combine['Age'],5)
combine['Age_Band'].unique()

[(16.136, 32.102], (32.102, 48.068], (48.068, 64.034], (0.0902, 16.136], (64.034, 80.0]]
Categories (5, interval[float64]): [(0.0902, 16.136] < (16.136, 32.102] < (32.102, 48.068] < (48.068, 64.034] < (64.034, 80.0]]

In [36]:
combine.loc[(combine['Age']<=16.136),'Age']=1
combine.loc[(combine['Age']>16.136) & (combine['Age']<=32.102),'Age']=2
combine.loc[(combine['Age']>32.102) & (combine['Age']<=48.068),'Age']=3
combine.loc[(combine['Age']>48.068) & (combine['Age']<=64.034),'Age']=4
combine.loc[(combine['Age']>64.034) & (combine['Age']<=80.),'Age']=5
combine['Age'].unique()

array([2., 3., 4., 1., 5.])

In [37]:
combine=combine.drop(['Age_Band'],1)


In [38]:
combine.head()

Unnamed: 0,Pclass,Sex,Age,Fare,Cabin,Embarked,familysize,Title
0,3,0,2.0,7.25,U,S,2,Mr
1,1,1,3.0,71.2833,C,C,2,Mrs
2,3,1,2.0,7.925,U,S,1,Miss
3,1,1,3.0,53.1,C,S,2,Mrs
4,3,0,3.0,8.05,U,S,1,Mr


In [39]:
combine['Fare_Band']=pd.cut(combine['Fare'],3)
combine['Fare_Band'].unique()

[(-0.512, 170.776], (170.776, 341.553], (341.553, 512.329]]
Categories (3, interval[float64]): [(-0.512, 170.776] < (170.776, 341.553] < (341.553, 512.329]]

In [40]:
combine.loc[(combine['Fare']<=170.776),'Fare']=1
combine.loc[(combine['Fare']>170.776) & (combine['Fare']<=341.553),'Fare']=2
combine.loc[(combine['Fare']>341.553) & (combine['Fare']<=512.329),'Fare']=3
combine=combine.drop(['Fare_Band'],1)

In [41]:
combine.head()

Unnamed: 0,Pclass,Sex,Age,Fare,Cabin,Embarked,familysize,Title
0,3,0,2.0,1.0,U,S,2,Mr
1,1,1,3.0,1.0,C,C,2,Mrs
2,3,1,2.0,1.0,U,S,1,Miss
3,1,1,3.0,1.0,C,S,2,Mrs
4,3,0,3.0,1.0,U,S,1,Mr


In [42]:
combine=pd.get_dummies(columns=['Pclass','Sex','Cabin','Embarked','Title','Age','Fare'],data=combine)
combine.head()

Unnamed: 0,familysize,Pclass_1,Pclass_2,Pclass_3,Sex_0,Sex_1,Cabin_A,Cabin_B,Cabin_C,Cabin_D,Cabin_E,Cabin_F,Cabin_G,Cabin_T,Cabin_U,Embarked_C,Embarked_Q,Embarked_S,Title_Capt,Title_Col,Title_Countess,Title_Don,Title_Dona,Title_Dr,Title_Jonkheer,Title_Lady,Title_Major,Title_Master,Title_Miss,Title_Mlle,Title_Mme,Title_Mr,Title_Mrs,Title_Ms,Title_Rev,Title_Sir,Age_1.0,Age_2.0,Age_3.0,Age_4.0,Age_5.0,Fare_1.0,Fare_2.0,Fare_512.3292
0,2,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0
1,2,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0
2,1,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0
3,2,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0
4,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0


In [43]:
x_train=combine.iloc[:trainrow]
x_test=combine.iloc[trainrow:]
x = x_train.copy()
x['survived'] = y_train
x.head()

Unnamed: 0,familysize,Pclass_1,Pclass_2,Pclass_3,Sex_0,Sex_1,Cabin_A,Cabin_B,Cabin_C,Cabin_D,Cabin_E,Cabin_F,Cabin_G,Cabin_T,Cabin_U,Embarked_C,Embarked_Q,Embarked_S,Title_Capt,Title_Col,Title_Countess,Title_Don,Title_Dona,Title_Dr,Title_Jonkheer,Title_Lady,Title_Major,Title_Master,Title_Miss,Title_Mlle,Title_Mme,Title_Mr,Title_Mrs,Title_Ms,Title_Rev,Title_Sir,Age_1.0,Age_2.0,Age_3.0,Age_4.0,Age_5.0,Fare_1.0,Fare_2.0,Fare_512.3292,survived
0,2,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0
1,2,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1
2,1,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1
3,2,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1
4,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0


In [44]:
x_train.shape

(891, 44)

# AutoML

In [45]:
clf = setup(data=x, target='survived')

Setup Succesfully Completed!


Unnamed: 0,Description,Value
0,session_id,3397
1,Target Type,Binary
2,Label Encoded,"0: 0, 1: 1"
3,Original Data,"(891, 45)"
4,Missing Values,False
5,Numeric Features,43
6,Categorical Features,1
7,Ordinal Features,False
8,High Cardinality Features,False
9,High Cardinality Method,


In [46]:
best = compare_models(sort='AUC')

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
0,Logistic Regression,0.8364,0.8747,0.7737,0.7983,0.784,0.6525,0.6547,0.0205
1,Gradient Boosting Classifier,0.8075,0.8718,0.7196,0.7686,0.7409,0.5883,0.5914,0.0831
2,Linear Discriminant Analysis,0.8203,0.8709,0.7446,0.7828,0.7605,0.6171,0.6203,0.0082
3,CatBoost Classifier,0.8139,0.867,0.7069,0.7917,0.744,0.5987,0.6038,1.7315
4,Ada Boost Classifier,0.8171,0.8664,0.7696,0.7633,0.7636,0.6148,0.6179,0.0717
5,Light Gradient Boosting Machine,0.8059,0.8425,0.6817,0.7879,0.7282,0.5787,0.5849,0.0681
6,K Neighbors Classifier,0.8025,0.8397,0.6652,0.7937,0.7198,0.5696,0.5783,0.0067
7,Random Forest Classifier,0.7946,0.8391,0.6942,0.7591,0.7214,0.5596,0.5643,0.0244
8,Extreme Gradient Boosting,0.7914,0.8302,0.665,0.7676,0.7083,0.5478,0.5549,0.1408
9,Extra Trees Classifier,0.7881,0.8155,0.6567,0.7683,0.7013,0.5398,0.5492,0.1052


In [47]:
models()

Unnamed: 0_level_0,Name,Reference,Turbo
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
lr,Logistic Regression,sklearn.linear_model.LogisticRegression,True
knn,K Neighbors Classifier,sklearn.neighbors.KNeighborsClassifier,True
nb,Naive Bayes,sklearn.naive_bayes.GaussianNB,True
dt,Decision Tree Classifier,sklearn.tree.DecisionTreeClassifier,True
svm,SVM - Linear Kernel,sklearn.linear_model.SGDClassifier,True
rbfsvm,SVM - Radial Kernel,sklearn.svm.SVC,False
gpc,Gaussian Process Classifier,sklearn.gaussian_process.GPC,False
mlp,MLP Classifier,sklearn.neural_network.MLPClassifier,False
ridge,Ridge Classifier,sklearn.linear_model.RidgeClassifier,True
rf,Random Forest Classifier,sklearn.ensemble.RandomForestClassifier,True


In [48]:
dt = create_model('lr')

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.7619,0.8419,0.7083,0.68,0.6939,0.4992,0.4995
1,0.8413,0.8718,0.7917,0.7917,0.7917,0.6635,0.6635
2,0.8254,0.8665,0.75,0.7826,0.766,0.6268,0.6272
3,0.8226,0.8863,0.6957,0.8,0.7442,0.6094,0.6129
4,0.8226,0.8701,0.8333,0.7407,0.7843,0.6345,0.6377
5,0.8226,0.8701,0.7917,0.76,0.7755,0.6289,0.6293
6,0.9194,0.9359,0.8333,0.9524,0.8889,0.826,0.8306
7,0.8387,0.8355,0.75,0.8182,0.7826,0.6548,0.6564
8,0.871,0.9035,0.875,0.8077,0.84,0.7322,0.7338
9,0.8387,0.8657,0.7083,0.85,0.7727,0.6493,0.6558


In [49]:
tuned_dt = tune_model(dt, optimize='Accuracy')

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.7619,0.8397,0.7083,0.68,0.6939,0.4992,0.4995
1,0.8413,0.8697,0.7917,0.7917,0.7917,0.6635,0.6635
2,0.8254,0.8622,0.75,0.7826,0.766,0.6268,0.6272
3,0.8226,0.8885,0.6957,0.8,0.7442,0.6094,0.6129
4,0.8226,0.8701,0.8333,0.7407,0.7843,0.6345,0.6377
5,0.8226,0.8613,0.7917,0.76,0.7755,0.6289,0.6293
6,0.9032,0.9282,0.8333,0.9091,0.8696,0.7929,0.7948
7,0.8387,0.8421,0.75,0.8182,0.7826,0.6548,0.6564
8,0.871,0.8914,0.875,0.8077,0.84,0.7322,0.7338
9,0.8226,0.8679,0.6667,0.8421,0.7442,0.6112,0.6209


In [50]:
evaluate_model(tuned_dt)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

In [51]:
predict_model(tuned_dt)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Logistic Regression,0.8172,0.8623,0.7573,0.7647,0.761,0.6129,0.613


Unnamed: 0,Pclass_1,Pclass_2,Pclass_3,Sex_0,Sex_1,Cabin_A,Cabin_B,Cabin_C,Cabin_D,Cabin_E,...,familysize_2,familysize_3,familysize_4,familysize_5,familysize_6,familysize_7,familysize_8,survived,Label,Score
0,0,0,1,1,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0786
1,0,1,0,1,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.2165
2,0,0,1,0,1,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0.5775
3,0,1,0,1,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0725
4,1,0,0,1,0,0,1,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.1781
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
263,1,0,0,1,0,0,1,0,0,0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1,1,0.8901
264,1,0,0,1,0,0,0,0,1,0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0,1,0.5537
265,0,0,1,1,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0,0,0.0194
266,0,1,0,0,1,0,0,0,0,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1,0.9389


In [52]:
final_dt = finalize_model(tuned_dt)

In [67]:
predictions = predict_model(final_dt, data=x_test)
predictions

Unnamed: 0,familysize,Pclass_1,Pclass_2,Pclass_3,Sex_0,Sex_1,Cabin_A,Cabin_B,Cabin_C,Cabin_D,...,Age_1.0,Age_2.0,Age_3.0,Age_4.0,Age_5.0,Fare_1.0,Fare_2.0,Fare_512.3292,Label,Score
0,1,0,0,1,1,0,0,0,0,0,...,0,0,1,0,0,1,0,0,0,0.0740
1,2,0,0,1,0,1,0,0,0,0,...,0,0,1,0,0,1,0,0,1,0.5574
2,1,0,1,0,1,0,0,0,0,0,...,0,0,0,1,0,1,0,0,0,0.1354
3,1,0,0,1,1,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,0.0804
4,3,0,0,1,0,1,0,0,0,0,...,0,1,0,0,0,1,0,0,1,0.7149
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
413,1,0,0,1,1,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,0.0804
414,1,1,0,0,0,1,0,0,1,0,...,0,0,1,0,0,1,0,0,1,0.9370
415,1,0,0,1,1,0,0,0,0,0,...,0,0,1,0,0,1,0,0,0,0.0522
416,1,0,0,1,1,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,0.0804


In [68]:
final = test_raw_data[['PassengerId']]
final['Survived'] = predictions['Label']
final

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,1
2,894,0
3,895,0
4,896,1
...,...,...
413,1305,0
414,1306,1
415,1307,0
416,1308,0


In [69]:
final.to_csv('predictions.csv', index=False)

In [70]:
a = pd.read_csv('predictions.csv')
a

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,1
2,894,0
3,895,0
4,896,1
...,...,...
413,1305,0
414,1306,1
415,1307,0
416,1308,0
