# Análisis Títulos Titanic

En este notebook utilizaremos algunas técnicas de Data Wranging para la obtención de nuevos insights del dataset Titanic

In [None]:
import pandas as pd
import numpy as np

In [None]:
df=pd.read_csv('titanic.csv')

In [None]:
df.head(2)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C


### Análisis de Valores Perdidos

In [None]:
df.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [None]:
df.groupby('Pclass').mean()

Unnamed: 0_level_0,PassengerId,Survived,Age,SibSp,Parch,Fare
Pclass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,461.597222,0.62963,38.233441,0.416667,0.356481,84.154687
2,445.956522,0.472826,29.87763,0.402174,0.380435,20.662183
3,439.154786,0.242363,25.14062,0.615071,0.393075,13.67555


In [None]:
def imputar_edad(fila):
    clase = fila[0]
    edad = fila[1]
    if pd.isnull(edad):
        if(clase==1):
            return 38.23
        elif(clase==2):
            return 29.88
        else:
            return 25.14
    else:
        return edad

In [None]:
print(imputar_edad( [1, 35] ))
print(imputar_edad( [2, 22] ))
print(imputar_edad( [3, np.nan] ))

35
22
25.14


In [None]:
df['Age'] = df[ ['Pclass','Age'] ].apply(imputar_edad, axis=1)

In [None]:
df.tail()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,25.14,1,2,W./C. 6607,23.45,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


### Análisis de Nombres

In [None]:
nombre = 'Graham, Miss. Margaret Edith'

In [None]:
nombre.split(',')[1].strip().split('.')[0]

'Miss'

In [None]:
df['Name'].apply( lambda x : x.split(',')[1].strip().split('.')[0] ).unique()

array(['Mr', 'Mrs', 'Miss', 'Master', 'Don', 'Rev', 'Dr', 'Mme', 'Ms',
       'Major', 'Lady', 'Sir', 'Mlle', 'Col', 'Capt', 'the Countess',
       'Jonkheer'], dtype=object)

In [None]:
df['Title'] = df['Name'].apply( lambda x : x.split(',')[1].strip().split('.')[0] )

In [None]:
df[ df['Title']=='Master']

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Title
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S,Master
16,17,0,3,"Rice, Master. Eugene",male,2.0,4,1,382652,29.125,,Q,Master
50,51,0,3,"Panula, Master. Juha Niilo",male,7.0,4,1,3101295,39.6875,,S,Master
59,60,0,3,"Goodwin, Master. William Frederick",male,11.0,5,2,CA 2144,46.9,,S,Master
63,64,0,3,"Skoog, Master. Harald",male,4.0,3,2,347088,27.9,,S,Master
65,66,1,3,"Moubarek, Master. Gerios",male,25.14,1,1,2661,15.2458,,C,Master
78,79,1,2,"Caldwell, Master. Alden Gates",male,0.83,0,2,248738,29.0,,S,Master
125,126,1,3,"Nicola-Yarred, Master. Elias",male,12.0,1,0,2651,11.2417,,C,Master
159,160,0,3,"Sage, Master. Thomas Henry",male,25.14,8,2,CA. 2343,69.55,,S,Master
164,165,0,3,"Panula, Master. Eino Viljami",male,1.0,4,1,3101295,39.6875,,S,Master


In [None]:
df['Title'].value_counts()

Mr              517
Miss            182
Mrs             125
Master           40
Dr                7
Rev               6
Col               2
Mlle              2
Major             2
Lady              1
Mme               1
Jonkheer          1
Ms                1
Don               1
Sir               1
Capt              1
the Countess      1
Name: Title, dtype: int64