# Titanic Survivors - Data Visualization with Pandas and Seaborn
Original dataset can be found [here](https://www.kaggle.com/c/titanic/data). I am using the training data (train.csv).

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style="whitegrid")

data = pd.read_csv("data/titanic.csv")

Let's take a look at our dataframe

In [None]:
data.head()

Firstly, we'll show the relationship between the number of survivors and gender and travel classes

In [None]:
dta = data.groupby(["Sex", "Pclass"], as_index=False)["Survived"].sum()

g = sns.catplot(
    data=dta, kind="bar",
    x="Pclass", y="Survived", hue="Sex", 
    palette="dark", alpha=.9, height=6
).set(title="Number of Titanic survivors by class and sex")
g.set_axis_labels("Class", "Number of survivors")

Let's see the same relationship but now expressed as a percentage

In [None]:
dta = data.groupby(["Sex", "Pclass"], as_index=False)["Survived"].apply(lambda x:
                                                                        100 * x.sum()/x.count())

g = sns.catplot(
    data=dta, kind="bar",
    x="Pclass", y="Survived", hue="Sex",
    palette="dark", alpha=.9, height=8
).set(title="Percentage of Titanic survivors by class and sex")
g.set_axis_labels("Class", "Percentage of survivors [%]")