# Datos del titanic

In [3]:
import seaborn as sns

In [6]:
import numpy as np
import pandas as pd

In [4]:
titanic = sns.load_dataset("titanic")

In [5]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [12]:
titanic.shape

(891, 15)

Con `pd.Categorical` transformamos una variable a tipo factor

In [7]:
titanic["survived"] = pd.Categorical(titanic["survived"])

In [8]:
titanic["pclass"] = pd.Categorical(titanic["pclass"])

In [9]:
titanic["sibsp"] = pd.Categorical(titanic["sibsp"])
titanic["embark_town"] = pd.Categorical(titanic["embark_town"])
titanic["embarked"] = pd.Categorical(titanic["embarked"])
titanic["parch"] = pd.Categorical(titanic["parch"])
titanic["adult_male"] = pd.Categorical(titanic["adult_male"])
titanic["deck"] = pd.Categorical(titanic["deck"])
titanic["alone"] = pd.Categorical(titanic["alone"])

In [16]:
titanic["sex"] = pd.Categorical(titanic["sex"])

# Tablas de contingencia de una varible

In [10]:
tab = pd.crosstab(index = titanic["survived"],
                 columns = "pasajeros")
tab

col_0,pasajeros
survived,Unnamed: 1_level_1
0,549
1,342


In [14]:
type(tab)

pandas.core.frame.DataFrame

In [27]:
tab_p = pd.crosstab(index = titanic["pclass"],
           columns = "pasajeros")
tab_p

col_0,pasajeros
pclass,Unnamed: 1_level_1
1,216
2,184
3,491


In [28]:
tab_s = pd.crosstab(index = titanic["sex"],
           columns = "pasajeros")
tab_s

col_0,pasajeros
sex,Unnamed: 1_level_1
female,314
male,577


In [20]:
tab_c = pd.crosstab(index = titanic["deck"],
           columns = "pasajeros")
tab_c

col_0,pasajeros
deck,Unnamed: 1_level_1
A,15
B,47
C,59
D,33
E,32
F,13
G,4


In [21]:
tab_c.sum()

col_0
pasajeros    203
dtype: int64

In [24]:
tab_c.shape

(7, 1)

In [25]:
tab_c.iloc[1:5]

col_0,pasajeros
deck,Unnamed: 1_level_1
B,47
C,59
D,33
E,32


In [26]:
tab_c/tab_c.sum() #Frecuencias relativas

col_0,pasajeros
deck,Unnamed: 1_level_1
A,0.073892
B,0.231527
C,0.29064
D,0.162562
E,0.157635
F,0.064039
G,0.019704


In [29]:
tab_p/tab_p.sum()

col_0,pasajeros
pclass,Unnamed: 1_level_1
1,0.242424
2,0.20651
3,0.551066


In [30]:
tab/tab.sum()

col_0,pasajeros
survived,Unnamed: 1_level_1
0,0.616162
1,0.383838


# Tablas de contingencia de dos variables

In [32]:
survived_sex = pd.crosstab(index = titanic["survived"],
                          columns = titanic["sex"])
survived_sex.index = ["died", "survived"]
survived_sex

sex,female,male
died,81,468
survived,233,109


In [37]:
survived_class = pd.crosstab(index = titanic["survived"],
                            columns = titanic["pclass"],
                            margins = True)
survived_class.index = ["murio", "sobrevivio", "total_clase"]
survived_class.columns = ["primera", "segunda", "tercera", "total_superv"]
survived_class

Unnamed: 0,primera,segunda,tercera,total_superv
murio,80,97,372,549
sobrevivio,136,87,119,342
total_clase,216,184,491,891


### Frecuencias relativas globales

In [38]:
survived_class/survived_class.loc["total_clase", "total_superv"]

Unnamed: 0,primera,segunda,tercera,total_superv
murio,0.089787,0.108866,0.417508,0.616162
sobrevivio,0.152637,0.097643,0.133558,0.383838
total_clase,0.242424,0.20651,0.551066,1.0


### Frecuencias relativas marginales

##### Frecuencia relativa marginal por filas

In [39]:
survived_class/survived_class.loc["total_clase"] #Hacemos una frecuencias relativas para la variable clase

Unnamed: 0,primera,segunda,tercera,total_superv
murio,0.37037,0.527174,0.757637,0.616162
sobrevivio,0.62963,0.472826,0.242363,0.383838
total_clase,1.0,1.0,1.0,1.0


In [44]:
survived_class.div(survived_class.loc["total_clase"], axis = 1)

Unnamed: 0,primera,segunda,tercera,total_superv
murio,0.37037,0.527174,0.757637,0.616162
sobrevivio,0.62963,0.472826,0.242363,0.383838
total_clase,1.0,1.0,1.0,1.0


Los codigos anteriores dan exactamente el mismo resultado. En el segundo ejemplo usamos .div e indicamos el axis igual a 1 para que realice la division por columna

##### Frecuencia relativa marginal por columnas

In [46]:
survived_class.T/survived_class["total_superv"] #Funciona cuando hacemos la traspuesta y eliminamos el loc

Unnamed: 0,murio,sobrevivio,total_clase
primera,0.145719,0.397661,0.242424
segunda,0.176685,0.254386,0.20651
tercera,0.677596,0.347953,0.551066
total_superv,1.0,1.0,1.0


In [42]:
survived_class.div(survived_class["total_superv"], axis = 0)

Unnamed: 0,primera,segunda,tercera,total_superv
murio,0.145719,0.176685,0.677596,1.0
sobrevivio,0.397661,0.254386,0.347953,1.0
total_clase,0.242424,0.20651,0.551066,1.0


Los codigos anteriores dan exactamente el mismo resultado con la diferencia de que en el segundo ejemplo no transponemos la tabla. Ademas, en el segundo ejemplo utilizamos .div con axis igual a 0 para realizar la division a las filas

### Tablas multidimensionales

In [47]:
surv_sex_class = pd.crosstab(index = titanic["survived"],
                            columns = [titanic["sex"], titanic["pclass"]], 
                            margins = True)
surv_sex_class

sex,female,female,female,male,male,male,All
pclass,1,2,3,1,2,3,Unnamed: 7_level_1
survived,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
0,3,6,72,77,91,300,549
1,91,70,72,45,17,47,342
All,94,76,144,122,108,347,891


In [48]:
surv_sex_class["female"] #sacamos de la tabla solo la variable female

pclass,1,2,3
survived,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,3,6,72
1,91,70,72
All,94,76,144


In [49]:
surv_sex_class["female"][1] #sacamos de la tabla solo variable female de la clase 1

survived
0       3
1      91
All    94
Name: 1, dtype: int64

In [50]:
surv_sex_class/surv_sex_class.loc["All"]

sex,female,female,female,male,male,male,All
pclass,1,2,3,1,2,3,Unnamed: 7_level_1
survived,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
0,0.031915,0.078947,0.5,0.631148,0.842593,0.864553,0.616162
1,0.968085,0.921053,0.5,0.368852,0.157407,0.135447,0.383838
All,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [51]:
surv_sex_class.div(surv_sex_class["All"], axis = 0)

sex,female,female,female,male,male,male,All
pclass,1,2,3,1,2,3,Unnamed: 7_level_1
survived,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
0,0.005464,0.010929,0.131148,0.140255,0.165756,0.546448,1.0
1,0.266082,0.204678,0.210526,0.131579,0.049708,0.137427,1.0
All,0.105499,0.085297,0.161616,0.136925,0.121212,0.38945,1.0


In [53]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [63]:
surv_emb = pd.crosstab(index = titanic["survived"],
                        columns = titanic["embarked"],
                                   margins = True)
surv_emb

embarked,C,Q,S,All
survived,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,75,47,427,549
1,93,30,217,340
All,168,77,644,889


In [64]:
surv_emb.div(surv_emb.loc["All"], axis = 1)

embarked,C,Q,S,All
survived,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0.446429,0.61039,0.663043,0.617548
1,0.553571,0.38961,0.336957,0.382452
All,1.0,1.0,1.0,1.0


In [65]:
surv_emb.div(surv_emb["All"], axis = 0)

embarked,C,Q,S,All
survived,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0.136612,0.08561,0.777778,1.0
1,0.273529,0.088235,0.638235,1.0
All,0.188976,0.086614,0.724409,1.0


In [66]:
class_emb = pd.crosstab(index = titanic["pclass"],
                        columns = titanic["embarked"],
                                   margins = True)
class_emb

embarked,C,Q,S,All
pclass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,85,2,127,214
2,17,3,164,184
3,66,72,353,491
All,168,77,644,889


In [67]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True
