---
title: "Machine Learning from Disaster"
subtitle: "Part 2: Tree Ensemble Method with XGBoost"
author: "solar-san"
date-modified: "`r Sys.Date()`"
format:
  html:
    theme: github
    toc: true
    toc-location: left
    fig-align: center
    fig-width: 10
    fig-height: 10
    html-math-method: katex
    code-overflow: scroll
    code-copy: hover
    code-fold: show
    highlight-style: github
    citations-hover: true
    footnotes-hover: true
    header-includes: |
      <meta name="author" content="solar-san">
      <meta name="image" property="og:image" content="https://github.com/solar-san/Kaggle-DataQuests/blob/main/docs/figures/header.png?raw=true">
      <link rel="preconnect" href="https://fonts.googleapis.com">
      <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
      <link href="https://fonts.googleapis.com/css2?family=Atkinson+Hyperlegible:ital,wght@0,400;0,700;1,400;1,700&family=Fira+Code&display=swap" rel="stylesheet">
mainfont: "Atkinson Hyperlegible"
monofont: 'Fira Code'
---

# Setup and Imports

In [8]:
import pandas as pd
import zipfile
import shutil



In [2]:
with zipfile.ZipFile(
    "data/titanic.zip",
    'r'
) as zip_ref:
    zip_ref.extractall("data/unzipped/")

train = pd.read_csv(
    "data/unzipped/train.csv"
)
test = pd.read_csv(
    "data/unzipped/test.csv"
)

shutil.rmtree(
    "data/unzipped/"
)

## Exploring and transforming data

In [7]:
train.head(10)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


In [4]:
#| tbl-cap: Summary Statistics
train.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,257.353842,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,20.125,0.0,0.0,7.9104
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.4542
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3292


In [6]:
pd.get_dummies(
    train, 
    columns=(
        [
            "Sex",
            "Pclass",
            "SibSp",
            "Parch",
            "Embarked"
            ]
        )
)

Unnamed: 0,PassengerId,Survived,Name,Sex,Age,Ticket,Fare,Cabin,Pclass_1,Pclass_2,...,Parch_0,Parch_1,Parch_2,Parch_3,Parch_4,Parch_5,Parch_6,Embarked_C,Embarked_Q,Embarked_S
0,1,0,"Braund, Mr. Owen Harris",male,22.0,A/5 21171,7.2500,,False,False,...,True,False,False,False,False,False,False,False,False,True
1,2,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,PC 17599,71.2833,C85,True,False,...,True,False,False,False,False,False,False,True,False,False
2,3,1,"Heikkinen, Miss. Laina",female,26.0,STON/O2. 3101282,7.9250,,False,False,...,True,False,False,False,False,False,False,False,False,True
3,4,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,113803,53.1000,C123,True,False,...,True,False,False,False,False,False,False,False,False,True
4,5,0,"Allen, Mr. William Henry",male,35.0,373450,8.0500,,False,False,...,True,False,False,False,False,False,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,"Montvila, Rev. Juozas",male,27.0,211536,13.0000,,False,True,...,True,False,False,False,False,False,False,False,False,True
887,888,1,"Graham, Miss. Margaret Edith",female,19.0,112053,30.0000,B42,True,False,...,True,False,False,False,False,False,False,False,False,True
888,889,0,"Johnston, Miss. Catherine Helen ""Carrie""",female,,W./C. 6607,23.4500,,False,False,...,False,False,True,False,False,False,False,False,False,True
889,890,1,"Behr, Mr. Karl Howell",male,26.0,111369,30.0000,C148,True,False,...,True,False,False,False,False,False,False,True,False,False


In [None]:
sklearn.c