# Aplicación Titanic
---

In [None]:
import pandas as pd

titanic = pd.read_csv("titanic.csv")
titanic.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


## Análisis NaN
---

In [None]:
titanic.isna().sum().sort_values(ascending=False)

Cabin          327
Age             86
Fare             1
Embarked         0
Ticket           0
Parch            0
SibSp            0
Sex              0
Name             0
Pclass           0
PassengerId      0
dtype: int64

In [None]:
#Elimino Cabina 
titanic.drop("Cabin", inplace=True, axis=1)

In [None]:
#Calculo la media
media = titanic.Age.mean(skipna=True)
print(media)
media2 =titanic.Fare.mean(skipna=True)

# Reemplazo NaN con la Media
titanic["Age"].fillna(media, inplace=True)
titanic["Fare"].fillna(media2, inplace=True)


30.272590361445783


In [None]:
titanic.isna().sum().sort_values(ascending=False)

Embarked       0
Fare           0
Ticket         0
Parch          0
SibSp          0
Age            0
Sex            0
Name           0
Pclass         0
PassengerId    0
dtype: int64

##  Manejo de texto
---
Utilizamos **.str** para acceder a los métodos asociados a strings!

In [None]:
titanic["Name"].str.lower()

0                                  kelly, mr. james
1                  wilkes, mrs. james (ellen needs)
2                         myles, mr. thomas francis
3                                  wirz, mr. albert
4      hirvonen, mrs. alexander (helga e lindqvist)
                           ...                     
413                              spector, mr. woolf
414                    oliva y ocana, dona. fermina
415                    saether, mr. simon sivertsen
416                             ware, mr. frederick
417                        peter, master. michael j
Name: Name, Length: 418, dtype: object

In [None]:
mask = titanic["Name"].str.contains("Albert")
titanic[mask]

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,S
7,899,2,"Caldwell, Mr. Albert Francis",male,26.0,1,1,248738,29.0,S
55,947,3,"Rice, Master. Albert",male,10.0,4,1,382652,29.125,Q
210,1102,3,"Andersen, Mr. Albert Karvin",male,32.0,0,0,C 4001,22.525,S
259,1151,3,"Midtsjo, Mr. Karl Albert",male,21.0,0,0,345501,7.775,S
361,1253,2,"Mallet, Mrs. Albert (Antoinette Magnin)",female,24.0,1,1,S.C./PARIS 2079,37.0042,C


In [None]:
titanic["Name"] = titanic["Name"].str.split(",").str.get(0)
titanic.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,Kelly,male,34.5,0,0,330911,7.8292,,Q
1,893,3,Wilkes,female,47.0,1,0,363272,7.0,,S
2,894,2,Myles,male,62.0,0,0,240276,9.6875,,Q
3,895,3,Wirz,male,27.0,0,0,315154,8.6625,,S
4,896,3,Hirvonen,female,22.0,1,1,3101298,12.2875,,S


### Variables Dummies 
---

In [None]:
titanic["Sex_short"] = titanic["Sex"].replace({"male": 0, "female": 1})
titanic.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Sex_short
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q,0
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S,1
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q,0
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S,0
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S,1


In [None]:
pd.get_dummies(data = titanic, columns=["embarked"])

Unnamed: 0,passengerid,pclass,name,sex,age,sibsp,parch,ticket,fare,embarked_C,embarked_Q,embarked_S
0,892,3,"Kelly, Mr. James",male,34.50000,0,0,330911,7.8292,0,1,0
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.00000,1,0,363272,7.0000,0,0,1
2,894,2,"Myles, Mr. Thomas Francis",male,62.00000,0,0,240276,9.6875,0,1,0
3,895,3,"Wirz, Mr. Albert",male,27.00000,0,0,315154,8.6625,0,0,1
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.00000,1,1,3101298,12.2875,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
413,1305,3,"Spector, Mr. Woolf",male,30.27259,0,0,A.5. 3236,8.0500,0,0,1
414,1306,1,"Oliva y Ocana, Dona. Fermina",female,39.00000,0,0,PC 17758,108.9000,1,0,0
415,1307,3,"Saether, Mr. Simon Sivertsen",male,38.50000,0,0,SOTON/O.Q. 3101262,7.2500,0,0,1
416,1308,3,"Ware, Mr. Frederick",male,30.27259,0,0,359309,8.0500,0,0,1


### Cambio de los Títulos 
---

In [None]:
titanic.columns

Index(['passengerid', 'pclass', 'name', 'sex', 'age', 'sibsp', 'parch',
       'ticket', 'fare', 'embarked', 'valor Z edad'],
      dtype='object')

In [None]:
titanic.rename(columns = {'pclass':'clase'})

Unnamed: 0,passengerid,clase,name,sex,age,sibsp,parch,ticket,fare,embarked,valor Z edad
0,892,3,"Kelly, Mr. James",male,34.50000,0,0,330911,7.8292,Q,3.345917e-01
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.00000,1,0,363272,7.0000,S,1.323944e+00
2,894,2,"Myles, Mr. Thomas Francis",male,62.00000,0,0,240276,9.6875,Q,2.511166e+00
3,895,3,"Wirz, Mr. Albert",male,27.00000,0,0,315154,8.6625,S,-2.590195e-01
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.00000,1,1,3101298,12.2875,S,-6.547602e-01
...,...,...,...,...,...,...,...,...,...,...,...
413,1305,3,"Spector, Mr. Woolf",male,30.27259,0,0,A.5. 3236,8.0500,S,-2.530716e-15
414,1306,1,"Oliva y Ocana, Dona. Fermina",female,39.00000,0,0,PC 17758,108.9000,C,6.907583e-01
415,1307,3,"Saether, Mr. Simon Sivertsen",male,38.50000,0,0,SOTON/O.Q. 3101262,7.2500,S,6.511842e-01
416,1308,3,"Ware, Mr. Frederick",male,30.27259,0,0,359309,8.0500,S,-2.530716e-15


In [None]:
titanic.columns = [x.lower() for x in titanic.columns]
titanic.head()

Unnamed: 0,passengerid,pclass,name,sex,age,sibsp,parch,ticket,fare,embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,S


### Columnas en Función de otras Columnas
---

In [None]:
titanic["valor Z edad"] = ( titanic["age"] -  titanic["age"].mean() )/ titanic["age"].std()
titanic.head()

Unnamed: 0,passengerid,pclass,name,sex,age,sibsp,parch,ticket,fare,embarked,valor Z edad
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,Q,0.334592
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,S,1.323944
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,Q,2.511166
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,S,-0.259019
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,S,-0.65476


### Funciones de Summary
---

**idxmin()** y **idxmax()** devuelven el índice del mínimo y el máximo

In [None]:
id_mas_joven = titanic["age"].idxmin() 
id_mas_viejo = titanic["age"].idxmax() 

In [None]:
titanic.iloc[[id_mas_joven, id_mas_viejo]]

Unnamed: 0,passengerid,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,sex_short,valor Z edad
354,1246,3,"Dean, Miss. Elizabeth Gladys Millvina""""",female,0.17,1,2,C.A. 2315,20.575,,S,1,-2.12271
96,988,1,"Cavendish, Mrs. Tyrell William (Julia Florence...",female,76.0,1,0,19877,78.85,C46,S,1,3.224507


In [None]:
titanic["sex"].value_counts()

male      266
female    152
Name: sex, dtype: int64

### Discretizando Variables Continuas

--- 

In [None]:
titanic["edad_discreta"] = pd.qcut(titanic["age"], [0, .25, .5, .75, 1])
titanic.sort_values(by="age")

Unnamed: 0,passengerid,pclass,name,sex,age,sibsp,parch,ticket,fare,embarked,valor Z edad,edad_discreta
354,1246,3,"Dean, Miss. Elizabeth Gladys Millvina""""",female,0.17,1,2,C.A. 2315,20.5750,S,-2.382564,"(0.169, 23.0]"
201,1093,3,"Danbom, Master. Gilbert Sigvard Emanuel",male,0.33,0,2,347080,14.4000,S,-2.369901,"(0.169, 23.0]"
281,1173,3,"Peacock, Master. Alfred Edward",male,0.75,1,1,SOTON/O.Q. 3101315,13.7750,S,-2.336658,"(0.169, 23.0]"
307,1199,3,"Aks, Master. Philip Frank",male,0.83,0,1,392091,9.3500,S,-2.330327,"(0.169, 23.0]"
250,1142,2,"West, Miss. Barbara J",female,0.92,1,2,C.A. 34651,27.7500,S,-2.323203,"(0.169, 23.0]"
...,...,...,...,...,...,...,...,...,...,...,...,...
305,1197,1,"Crosby, Mrs. Edward Gifford (Catherine Elizabe...",female,64.00,1,1,112901,26.5500,S,2.669462,"(35.75, 76.0]"
179,1071,1,"Compton, Mrs. Alexander Taylor (Mary Eliza Ing...",female,64.00,0,2,PC 17756,83.1583,C,2.669462,"(35.75, 76.0]"
236,1128,1,"Warren, Mr. Frank Manley",male,64.00,1,0,110813,75.2500,C,2.669462,"(35.75, 76.0]"
81,973,1,"Straus, Mr. Isidor",male,67.00,1,0,PC 17483,221.7792,S,2.906907,"(35.75, 76.0]"
