# 使用Pycaret 進行鐵達尼號存活
參考：  https://medium.com/%E5%B7%A5%E7%A8%8B%E9%9A%A8%E5%AF%AB%E7%AD%86%E8%A8%98/pycaret-%E6%A9%9F%E5%99%A8%E5%AD%B8%E7%BF%92%E7%A5%9E%E5%99%A8-d059de0752cd

In [1]:
!pip install catboost
!pip install pycaret[full]

Collecting catboost
  Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.7
Collecting pycaret[full]
  Downloading pycaret-3.3.2-py3-none-any.whl.metadata (17 kB)
Collecting pandas<2.2.0 (from pycaret[full])
  Downloading pandas-2.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting scipy<=1.11.4,>=1.6.1 (from pycaret[full])
  Downloading scipy-1.11.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting joblib<1.4,>=1.2.0 (from pycaret[full])
  Downloading joblib-1.3.2-py3-none-any.whl.metadat

In [1]:
!pip install --upgrade pip
!apt-get update
!apt-get install -y build-essential libssl-dev libffi-dev python3-dev


Collecting pip
  Using cached pip-24.3.1-py3-none-any.whl.metadata (3.7 kB)
Using cached pip-24.3.1-py3-none-any.whl (1.8 MB)
Installing collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-24.3.1
Hit:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:2 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:3 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:6 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpad

In [2]:
!pip install scikit-learn
!pip uninstall lightgbm -y
!pip install lightgbm

Found existing installation: lightgbm 4.5.0
Uninstalling lightgbm-4.5.0:
  Successfully uninstalled lightgbm-4.5.0
Collecting lightgbm
  Downloading lightgbm-4.5.0-py3-none-manylinux_2_28_x86_64.whl.metadata (17 kB)
Downloading lightgbm-4.5.0-py3-none-manylinux_2_28_x86_64.whl (3.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m63.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: lightgbm
Successfully installed lightgbm-4.5.0


## 讀取資料

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

data_train = pd.read_csv('/content/sample_data/train.csv')
data_test = pd.read_csv('/content/sample_data/test.csv')
data_train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [5]:
def simplify_ages(df):
    df.Age = df.Age.fillna(-0.5)
    bins = (-1, 0, 5, 12, 18, 25, 35, 60, 120)
    group_names = ['Unknown', 'Baby', 'Child', 'Teenager', 'Student', 'Young Adult', 'Adult', 'Senior']
    categories = pd.cut(df.Age, bins, labels=group_names)
    df.Age = categories
    return df

def simplify_cabins(df):
    df.Cabin = df.Cabin.fillna('N')
    df.Cabin = df.Cabin.apply(lambda x: x[0])
    return df

def simplify_fares(df):
    df.Fare = df.Fare.fillna(-0.5)
    bins = (-1, 0, 8, 15, 31, 1000)
    group_names = ['Unknown', '1_quartile', '2_quartile', '3_quartile', '4_quartile']
    categories = pd.cut(df.Fare, bins, labels=group_names)
    df.Fare = categories
    return df

def format_name(df):
    df['Lname'] = df.Name.apply(lambda x: x.split(' ')[0])
    df['NamePrefix'] = df.Name.apply(lambda x: x.split(' ')[1])
    return df

def drop_features(df):
    return df.drop(['Ticket', 'Name', 'Embarked'], axis=1)

def transform_features(df):
    df = simplify_ages(df)
    df = simplify_cabins(df)
    df = simplify_fares(df)
    df = format_name(df)
    df = drop_features(df)
    return df

data_train = transform_features(data_train)
data_test = transform_features(data_test)
data_train.head()

from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(data_train.drop(['PassengerId'], axis=1), random_state=100, train_size=0.8)

train_data.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Cabin,Lname,NamePrefix
408,0,3,male,Student,0,0,1_quartile,N,"Birkeland,",Mr.
480,0,3,male,Child,5,2,4_quartile,N,"Goodwin,",Master.
510,1,3,male,Young Adult,0,0,1_quartile,N,"Daly,",Mr.
609,1,1,female,Adult,0,0,4_quartile,C,"Shutes,",Miss.
547,1,2,male,Unknown,0,0,2_quartile,N,Padro,y


In [6]:

from pycaret.classification import *

clf1 = setup(data = train_data,
             target = 'Survived')

Unnamed: 0,Description,Value
0,Session id,7552
1,Target,Survived
2,Target type,Binary
3,Original data shape,"(712, 10)"
4,Transformed data shape,"(712, 47)"
5,Transformed train set shape,"(498, 47)"
6,Transformed test set shape,"(214, 47)"
7,Numeric features,3
8,Categorical features,6
9,Preprocess,True


In [7]:
compare_models(fold = 5)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.8132,0.8679,0.7057,0.7767,0.7392,0.5942,0.5961,1.156
knn,K Neighbors Classifier,0.8052,0.8376,0.6363,0.8053,0.71,0.5664,0.576,0.14
ridge,Ridge Classifier,0.8052,0.8578,0.6468,0.8029,0.7128,0.5682,0.5785,0.15
et,Extra Trees Classifier,0.7809,0.8144,0.6253,0.7594,0.6815,0.5173,0.5264,0.27
rf,Random Forest Classifier,0.7769,0.8213,0.6465,0.7447,0.6868,0.5151,0.5225,0.504
catboost,CatBoost Classifier,0.7769,0.7829,0.6683,0.7309,0.6951,0.5199,0.5241,1.514
gbc,Gradient Boosting Classifier,0.7567,0.732,0.6624,0.6895,0.6744,0.4805,0.482,0.222
xgboost,Extreme Gradient Boosting,0.7468,0.7392,0.6306,0.6818,0.6536,0.4546,0.4568,0.202
svm,SVM - Linear Kernel,0.7431,0.8014,0.4398,0.7724,0.5476,0.3958,0.4286,0.122
ada,Ada Boost Classifier,0.7227,0.7345,0.6898,0.6242,0.6529,0.4241,0.4275,0.212


Processing:   0%|          | 0/69 [00:00<?, ?it/s]

In [8]:
lr = create_model('lr')
ridge = create_model('ridge')
knn = create_model('knn')
nb = create_model('nb')

stacker = stack_models(estimator_list = [ridge,knn,nb], meta_model = lr)
stacker

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.78,0.8727,0.6842,0.7222,0.7027,0.5283,0.5288
1,0.82,0.8956,0.6842,0.8125,0.7429,0.606,0.6113
2,0.9,0.9338,0.7895,0.9375,0.8571,0.7811,0.7879
3,0.82,0.8846,0.7368,0.7778,0.7568,0.6141,0.6146
4,0.82,0.8879,0.6842,0.8125,0.7429,0.606,0.6113
5,0.8,0.8557,0.6842,0.7647,0.7222,0.5667,0.5689
6,0.74,0.8387,0.5789,0.6875,0.6286,0.4308,0.4346
7,0.8,0.7951,0.7222,0.7222,0.7222,0.566,0.566
8,0.8571,0.8486,0.7778,0.8235,0.8,0.689,0.6897
9,0.7959,0.8441,0.6667,0.75,0.7059,0.5505,0.5527


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8,0.8421,0.7368,0.7368,0.7368,0.5756,0.5756
1,0.78,0.8905,0.5789,0.7857,0.6667,0.5081,0.5212
2,0.86,0.8998,0.7895,0.8333,0.8108,0.6998,0.7005
3,0.84,0.8862,0.6842,0.8667,0.7647,0.646,0.6564
4,0.82,0.8812,0.6316,0.8571,0.7273,0.5975,0.613
5,0.8,0.8523,0.5263,0.9091,0.6667,0.5379,0.5789
6,0.74,0.8489,0.5789,0.6875,0.6286,0.4308,0.4346
7,0.76,0.7517,0.7222,0.65,0.6842,0.4915,0.4933
8,0.7959,0.8235,0.6111,0.7857,0.6875,0.5395,0.5489
9,0.8163,0.8477,0.7222,0.7647,0.7429,0.6002,0.6008


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.82,0.8998,0.6842,0.8125,0.7429,0.606,0.6113
1,0.8,0.837,0.5789,0.8462,0.6875,0.5479,0.5693
2,0.86,0.9058,0.7368,0.875,0.8,0.6935,0.6996
3,0.84,0.916,0.7368,0.8235,0.7778,0.6534,0.6558
4,0.82,0.865,0.6842,0.8125,0.7429,0.606,0.6113
5,0.76,0.8514,0.5263,0.7692,0.625,0.4575,0.4753
6,0.72,0.7301,0.5789,0.6471,0.6111,0.3934,0.3949
7,0.76,0.7899,0.6111,0.6875,0.6471,0.4662,0.468
8,0.7959,0.7966,0.5,0.9,0.6429,0.5158,0.5595
9,0.7959,0.8306,0.6667,0.75,0.7059,0.5505,0.5527


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.38,0.4686,0.8947,0.3696,0.5231,-0.032,-0.0729
1,0.44,0.7419,1.0,0.4043,0.5758,0.0753,0.1978
2,0.4,0.6774,1.0,0.3878,0.5588,0.0247,0.1118
3,0.44,0.7954,1.0,0.4043,0.5758,0.0753,0.1978
4,0.44,0.5153,1.0,0.4043,0.5758,0.0753,0.1978
5,0.4,0.7504,0.9474,0.383,0.5455,0.0092,0.0243
6,0.42,0.764,1.0,0.3958,0.5672,0.0498,0.1598
7,0.38,0.6207,1.0,0.3673,0.5373,0.0227,0.1071
8,0.3878,0.6676,1.0,0.375,0.5455,0.0239,0.11
9,0.4082,0.7195,1.0,0.383,0.5538,0.0482,0.1572


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8,0.8421,0.7368,0.7368,0.7368,0.5756,0.5756
1,0.78,0.8888,0.6316,0.75,0.6857,0.5184,0.5229
2,0.88,0.8998,0.8421,0.8421,0.8421,0.7453,0.7453
3,0.82,0.8778,0.6842,0.8125,0.7429,0.606,0.6113
4,0.84,0.8778,0.7368,0.8235,0.7778,0.6534,0.6558
5,0.8,0.8421,0.5789,0.8462,0.6875,0.5479,0.5693
6,0.74,0.8472,0.6316,0.6667,0.6486,0.4425,0.4429
7,0.74,0.7465,0.7222,0.619,0.6667,0.4556,0.4592
8,0.7755,0.8199,0.6667,0.7059,0.6857,0.5113,0.5118
9,0.7959,0.8477,0.7222,0.7222,0.7222,0.5609,0.5609


Processing:   0%|          | 0/6 [00:00<?, ?it/s]

In [9]:
from sklearn.metrics import accuracy_score
save_model(stacker,'stacker_auc0.8599')
model = load_model('stacker_auc0.8599')
pred = predict_model(model,data = test_data)
accuracy_score(pred['Survived'],pred['prediction_label'])
pred.head()

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Stacking Classifier,0.8156,0.8558,0.72,0.8182,0.766,0.6149,0.6183


Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Cabin,Lname,NamePrefix,Survived,prediction_label,prediction_score
205,3,female,Baby,0,1,2_quartile,G,"Strom,",Miss.,0,1,0.7789
44,3,female,Student,0,0,1_quartile,N,"Devaney,",Miss.,1,1,0.6386
821,3,male,Young Adult,0,0,2_quartile,N,"Lulic,",Mr.,1,0,0.9213
458,2,female,Adult,0,0,2_quartile,N,"Toomey,",Miss.,1,1,0.5215
795,2,male,Adult,0,0,2_quartile,N,"Otter,",Mr.,0,0,0.9607


## stacking 模型，並使用 xgboost 作第二層預測

In [27]:
# 省時間全部fold切5分
lr = create_model('lr', fold = 5)
knn = create_model('knn', fold = 5)
nb = create_model('nb', fold = 5)
dt = create_model('dt', fold = 5)
svm = create_model('svm', fold = 5)
rbfsvm = create_model('rbfsvm', fold = 5)
gpc = create_model('gpc', fold = 5)
mlp = create_model('mlp', fold = 5)
ridge = create_model('ridge', fold = 5)
rf = create_model('rf', fold = 5)
qda = create_model('qda', fold = 5)
ada = create_model('ada', fold = 5)
lda = create_model('lda', fold = 5)
gbc = create_model('gbc', fold = 5)
et = create_model('et', fold = 5)
xgboost = create_model('xgboost', fold = 5)


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.81,0.8901,0.6842,0.7879,0.7324,0.5862,0.5897
1,0.85,0.9045,0.7632,0.8286,0.7945,0.6767,0.6781
2,0.81,0.8756,0.6757,0.7812,0.7246,0.5808,0.5843
3,0.7576,0.8136,0.6486,0.6857,0.6667,0.4764,0.4769
4,0.8384,0.8555,0.7568,0.8,0.7778,0.6509,0.6516
Mean,0.8132,0.8679,0.7057,0.7767,0.7392,0.5942,0.5961
Std,0.0319,0.0316,0.0459,0.0483,0.0449,0.0695,0.0696


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.79,0.861,0.5789,0.8148,0.6769,0.5279,0.5448
1,0.84,0.9007,0.7105,0.8438,0.7714,0.6497,0.6554
2,0.81,0.8516,0.6216,0.8214,0.7077,0.5709,0.5831
3,0.7677,0.7655,0.6216,0.7188,0.6667,0.4898,0.4928
4,0.8182,0.8095,0.6486,0.8276,0.7273,0.5939,0.6038
Mean,0.8052,0.8376,0.6363,0.8053,0.71,0.5664,0.576
Std,0.0247,0.0463,0.0433,0.0443,0.0375,0.0549,0.0548


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.4,0.5212,0.9474,0.383,0.5455,0.0092,0.0243
1,0.42,0.8154,1.0,0.3958,0.5672,0.0498,0.1598
2,0.41,0.6581,0.973,0.383,0.5496,0.0397,0.1064
3,0.404,0.7228,1.0,0.3854,0.5564,0.0366,0.1366
4,0.404,0.7435,1.0,0.3854,0.5564,0.0366,0.1366
Mean,0.4076,0.6922,0.9841,0.3865,0.555,0.0344,0.1127
Std,0.007,0.0991,0.0211,0.0048,0.0074,0.0135,0.0474


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.79,0.7593,0.6316,0.7742,0.6957,0.5379,0.5443
1,0.69,0.643,0.4474,0.6296,0.5231,0.3031,0.3128
2,0.79,0.7608,0.6486,0.75,0.6957,0.5366,0.5399
3,0.4949,0.4605,0.3243,0.3243,0.3243,-0.0789,-0.0789
4,0.7374,0.7195,0.6486,0.6486,0.6486,0.439,0.439
Mean,0.7005,0.6686,0.5401,0.6254,0.5775,0.3475,0.3514
Std,0.1093,0.1125,0.132,0.1605,0.1415,0.2299,0.2311


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.66,0.7808,0.1842,0.7,0.2917,0.1584,0.2198
1,0.74,0.8162,0.4474,0.7727,0.5667,0.3993,0.4297
2,0.77,0.7752,0.4595,0.85,0.5965,0.455,0.4971
3,0.7374,0.7975,0.4595,0.7391,0.5667,0.3926,0.4155
4,0.8081,0.8372,0.6486,0.8,0.7164,0.5738,0.581
Mean,0.7431,0.8014,0.4398,0.7724,0.5476,0.3958,0.4286
Std,0.0488,0.0229,0.1482,0.0512,0.1394,0.1353,0.1197


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.83,0.8688,0.6842,0.8387,0.7536,0.6259,0.6334
1,0.87,0.9164,0.7632,0.8788,0.8169,0.7169,0.7212
2,0.84,0.8833,0.7568,0.8,0.7778,0.6529,0.6535
3,0.7374,0.7598,0.6486,0.6486,0.6486,0.439,0.439
4,0.7778,0.8202,0.6757,0.7143,0.6944,0.5201,0.5205
Mean,0.811,0.8497,0.7057,0.7761,0.7383,0.5909,0.5935
Std,0.0473,0.0546,0.0459,0.0838,0.0599,0.099,0.1007


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8,0.8807,0.6053,0.8214,0.697,0.5528,0.5671
1,0.89,0.93,0.7895,0.9091,0.8451,0.7605,0.765
2,0.82,0.8589,0.7027,0.7879,0.7429,0.6051,0.6074
3,0.7475,0.7646,0.6757,0.6579,0.6667,0.4635,0.4636
4,0.798,0.8359,0.7568,0.7179,0.7368,0.5731,0.5736
Mean,0.8111,0.854,0.706,0.7788,0.7377,0.591,0.5954
Std,0.0461,0.0545,0.0642,0.0863,0.0604,0.0969,0.0975


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8,0.8353,0.6316,0.8,0.7059,0.5575,0.5665
1,0.87,0.9113,0.7895,0.8571,0.8219,0.7198,0.7213
2,0.8,0.7979,0.6757,0.7576,0.7143,0.5612,0.5634
3,0.7576,0.7568,0.6486,0.6857,0.6667,0.4764,0.4769
4,0.798,0.8638,0.6757,0.7576,0.7143,0.5588,0.561
Mean,0.8051,0.833,0.6842,0.7716,0.7246,0.5748,0.5778
Std,0.0363,0.0531,0.0552,0.0564,0.0517,0.0793,0.0793


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.81,0.8879,0.6316,0.8276,0.7164,0.5774,0.5893
1,0.84,0.8867,0.7105,0.8438,0.7714,0.6497,0.6554
2,0.8,0.8649,0.5405,0.8696,0.6667,0.5347,0.5655
3,0.7576,0.7958,0.6486,0.6857,0.6667,0.4764,0.4769
4,0.8182,0.8537,0.7027,0.7879,0.7429,0.6029,0.6053
Mean,0.8052,0.8578,0.6468,0.8029,0.7128,0.5682,0.5785
Std,0.0272,0.0336,0.0612,0.0643,0.0415,0.0591,0.0587


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.4,0.7572,0.9211,0.3804,0.5385,0.0013,0.003
1,0.47,0.6609,0.9474,0.4138,0.576,0.0999,0.1801
2,0.45,0.7001,0.973,0.4,0.5669,0.0894,0.1864
3,0.4242,0.6582,1.0,0.3936,0.5649,0.0615,0.1782
4,0.4444,0.7705,0.973,0.4,0.5669,0.0791,0.1717
Mean,0.4377,0.7094,0.9629,0.3976,0.5626,0.0663,0.1439
Std,0.0238,0.0471,0.0267,0.0108,0.0127,0.0348,0.0706


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.67,0.7767,0.2368,0.6923,0.3529,0.1975,0.2487
1,0.66,0.7687,0.2368,0.6429,0.3462,0.1779,0.2185
2,0.7,0.7447,0.2432,0.8182,0.375,0.2474,0.3264
3,0.6667,0.6986,0.3243,0.6,0.4211,0.2152,0.2353
4,0.7172,0.7714,0.3784,0.7368,0.5,0.3301,0.3658
Mean,0.6828,0.752,0.2839,0.698,0.399,0.2336,0.2789
Std,0.022,0.0289,0.0577,0.0757,0.0569,0.0534,0.057


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.84,0.7857,0.7368,0.8235,0.7778,0.6534,0.6558
1,0.82,0.7689,0.7105,0.7941,0.75,0.6101,0.6124
2,0.77,0.7334,0.6486,0.7059,0.6761,0.4983,0.4993
3,0.6566,0.6436,0.5676,0.5385,0.5526,0.2743,0.2745
4,0.697,0.7282,0.6486,0.5854,0.6154,0.3665,0.3678
Mean,0.7567,0.732,0.6624,0.6895,0.6744,0.4805,0.482
Std,0.0703,0.0491,0.0587,0.1121,0.0833,0.1432,0.144


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.79,0.8905,0.5789,0.8148,0.6769,0.5279,0.5448
1,0.84,0.8871,0.7368,0.8235,0.7778,0.6534,0.6558
2,0.8,0.814,0.5676,0.84,0.6774,0.5402,0.562
3,0.7374,0.7195,0.6757,0.641,0.6579,0.445,0.4454
4,0.7374,0.7611,0.5676,0.6774,0.6176,0.42,0.4238
Mean,0.7809,0.8144,0.6253,0.7594,0.6815,0.5173,0.5264
Std,0.0393,0.0677,0.069,0.083,0.0528,0.0823,0.0842


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.81,0.8298,0.6842,0.7879,0.7324,0.5862,0.5897
1,0.77,0.7714,0.6579,0.7143,0.6849,0.5043,0.5054
2,0.77,0.7263,0.5946,0.7333,0.6567,0.4866,0.4927
3,0.6465,0.6626,0.5676,0.525,0.5455,0.2569,0.2574
4,0.7374,0.7058,0.6486,0.6486,0.6486,0.439,0.439
Mean,0.7468,0.7392,0.6306,0.6818,0.6536,0.4546,0.4568
Std,0.0552,0.0573,0.0429,0.0902,0.0615,0.1097,0.1108


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

## 調整模型的超參數，提升模型的預測性能

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.76,0.7301,0.5789,0.7333,0.6471,0.469,0.4766
1,0.76,0.8328,0.6842,0.6842,0.6842,0.4907,0.4907
2,0.74,0.8268,0.4737,0.75,0.5806,0.4059,0.4284
3,0.76,0.8149,0.5263,0.7692,0.625,0.4575,0.4753
4,0.74,0.8421,0.3684,0.875,0.5185,0.3786,0.4451
5,0.68,0.7267,0.2105,0.8,0.3333,0.2079,0.2884
6,0.7,0.8251,0.3158,0.75,0.4444,0.283,0.3327
7,0.7,0.6701,0.7222,0.5652,0.6341,0.3863,0.3946
8,0.5918,0.6962,0.0556,0.25,0.0909,-0.0493,-0.0726
9,0.6531,0.767,0.2222,0.5714,0.32,0.1439,0.1728


Processing:   0%|          | 0/6 [00:00<?, ?it/s]

In [37]:
knn = tune_model(knn)
stacker_all = stack_models(estimator_list = [lr,knn,nb,svm,rbfsvm,gpc,mlp,ridge,qda,lda,gbc,et], meta_model = xgboost)

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.82,0.8379,0.5789,0.9167,0.7097,0.5887,0.6213
1,0.76,0.8956,0.4737,0.8182,0.6,0.4455,0.4794
2,0.88,0.9075,0.7368,0.9333,0.8235,0.7345,0.7463
3,0.88,0.9338,0.6842,1.0,0.8125,0.7288,0.7571
4,0.82,0.8413,0.6842,0.8125,0.7429,0.606,0.6113
5,0.82,0.8379,0.6316,0.8571,0.7273,0.5975,0.613
6,0.72,0.7156,0.5263,0.6667,0.5882,0.3805,0.3866
7,0.82,0.8142,0.6667,0.8,0.7273,0.5946,0.6001
8,0.7959,0.8002,0.5556,0.8333,0.6667,0.5279,0.5505
9,0.8367,0.8369,0.6667,0.8571,0.75,0.6316,0.6426


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.76,0.7402,0.5789,0.7333,0.6471,0.469,0.4766
1,0.72,0.7827,0.5263,0.6667,0.5882,0.3805,0.3866
2,0.88,0.8548,0.8421,0.8421,0.8421,0.7453,0.7453
3,0.78,0.7402,0.6842,0.7222,0.7027,0.5283,0.5288
4,0.82,0.7997,0.7895,0.75,0.7692,0.6218,0.6224
5,0.72,0.7012,0.5789,0.6471,0.6111,0.3934,0.3949
6,0.7,0.7301,0.5789,0.6111,0.5946,0.3568,0.3571
7,0.72,0.7144,0.7778,0.5833,0.6667,0.4337,0.447
8,0.7143,0.7052,0.6111,0.6111,0.6111,0.3853,0.3853
9,0.7347,0.6989,0.6111,0.6471,0.6286,0.4225,0.4229


Processing:   0%|          | 0/6 [00:00<?, ?it/s]

## 測試資料的 accuracy

In [38]:
from sklearn.metrics import accuracy_score
pred = predict_model(stacker_all,data = test_data)
accuracy_score(pred['Survived'],pred['prediction_label'])

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Stacking Classifier,0.8045,0.8183,0.72,0.7941,0.7552,0.5931,0.5951


0.8044692737430168