# [Titanic survival](https://docs.abzu.ai/docs/tutorials/python/titanic.html)

In this tutorial, we'll be using Feyn and the QLattice to solve a binary classification problem by exploring models that aim to predict the probability of surviving the disaster of the RMS Titanic during her maiden voyage in April of 1912.

In [25]:
import numpy as np
import pandas as pd

import feyn
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
%matplotlib inline

In [26]:
df = pd.read_csv('./data/titanic.csv')
df.head()

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
0,1,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S,2.0,,"St Louis, MO"
1,1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.55,C22 C26,S,11.0,,"Montreal, PQ / Chesterville, ON"
2,1,0,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"
3,1,0,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1,2,113781,151.55,C22 C26,S,,135.0,"Montreal, PQ / Chesterville, ON"
4,1,0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0,1,2,113781,151.55,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"


In [27]:
# Checking which columns have nan values:
df.columns[df.isna().any().values].to_list()

['age', 'cabin', 'boat', 'body', 'home.dest']

In [33]:
df[df.age.isna()]

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
816,3,0,"Gheorgheff, Mr. Stanio",male,,0,0,349254,7.8958,,C,,,
940,3,0,"Kraeff, Mr. Theodor",male,,0,0,349253,7.8958,,C,,,


In [29]:
age_dist = df[(df.pclass == 3) & (df.embarked == 'C') & (df.sex == 'male') & 
              (df.sibsp == 0) & (df.parch == 0) & (df.survived == 0)].age.dropna()

In [30]:
mean_age = np.mean(age_dist)
std_age = np.std(age_dist)

np.random.seed(42)
age_guess = np.random.normal(mean_age, std_age, size=2)
# In a simple manner, we drop some features which could be irrelevant (at first look) 
df_mod = df.drop(['boat', 'body', 'home.dest', 'name', 'ticket', 'cabin'], axis=1)
df_mod.loc[df[df.age.isna()].index, 'age'] = age_guess