In [1]:
import numpy as np
import pandas as pd

array = np.array([["Bella","Labrador","Cafe",56,24,"2013-07-01"],
                  ["Charlie","Poodle","Negro",43,24,"2016-09-16"],
                  ["Lucy","Chow Chow","Cafe",46,24,"2014-08-25"],
                  ["Cooper","Schnauzer","Gris",49,17,"2016-09-16"],
                  ["Bernie","San Bernardo","Blanco",77,74,"2011-12-11"],
                  ["Max","Chow Chow","Blanco",45,28,"2019-03-27"],
                  ["Reed","Schnauzer","Negro",45,15,"2017-09-18"]])

In [2]:
df = pd.DataFrame(data=array, columns=["Nombre","Raza","Color","Altura_cm","Peso_kg","Fecha_Nac"])
df["Altura_cm"] = df["Altura_cm"].astype(int)
df["Peso_kg"] = df["Peso_kg"].astype(int)
df

Unnamed: 0,Nombre,Raza,Color,Altura_cm,Peso_kg,Fecha_Nac
0,Bella,Labrador,Cafe,56,24,2013-07-01
1,Charlie,Poodle,Negro,43,24,2016-09-16
2,Lucy,Chow Chow,Cafe,46,24,2014-08-25
3,Cooper,Schnauzer,Gris,49,17,2016-09-16
4,Bernie,San Bernardo,Blanco,77,74,2011-12-11
5,Max,Chow Chow,Blanco,45,28,2019-03-27
6,Reed,Schnauzer,Negro,45,15,2017-09-18


In [3]:
# Informacion sobre columnas del DataFrame
df.columns

Index(['Nombre', 'Raza', 'Color', 'Altura_cm', 'Peso_kg', 'Fecha_Nac'], dtype='object')

In [4]:
# Informacion sobre los renglones del DataFrame
df.index

RangeIndex(start=0, stop=7, step=1)

In [5]:
# Para establecer una columna como indice (titulos del renghlon)
df_ind = df.set_index("Nombre")
df_ind

Unnamed: 0_level_0,Raza,Color,Altura_cm,Peso_kg,Fecha_Nac
Nombre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bella,Labrador,Cafe,56,24,2013-07-01
Charlie,Poodle,Negro,43,24,2016-09-16
Lucy,Chow Chow,Cafe,46,24,2014-08-25
Cooper,Schnauzer,Gris,49,17,2016-09-16
Bernie,San Bernardo,Blanco,77,74,2011-12-11
Max,Chow Chow,Blanco,45,28,2019-03-27
Reed,Schnauzer,Negro,45,15,2017-09-18


In [6]:
# Para deshacer la asignacion de indices
df_ind = df_ind.reset_index()
df_ind

Unnamed: 0,Nombre,Raza,Color,Altura_cm,Peso_kg,Fecha_Nac
0,Bella,Labrador,Cafe,56,24,2013-07-01
1,Charlie,Poodle,Negro,43,24,2016-09-16
2,Lucy,Chow Chow,Cafe,46,24,2014-08-25
3,Cooper,Schnauzer,Gris,49,17,2016-09-16
4,Bernie,San Bernardo,Blanco,77,74,2011-12-11
5,Max,Chow Chow,Blanco,45,28,2019-03-27
6,Reed,Schnauzer,Negro,45,15,2017-09-18


In [7]:
df_ind = df.set_index("Nombre")
# Para remover indices y columnas con informacion de indices
df_ind = df_ind.reset_index(drop=True)
df_ind

Unnamed: 0,Raza,Color,Altura_cm,Peso_kg,Fecha_Nac
0,Labrador,Cafe,56,24,2013-07-01
1,Poodle,Negro,43,24,2016-09-16
2,Chow Chow,Cafe,46,24,2014-08-25
3,Schnauzer,Gris,49,17,2016-09-16
4,San Bernardo,Blanco,77,74,2011-12-11
5,Chow Chow,Blanco,45,28,2019-03-27
6,Schnauzer,Negro,45,15,2017-09-18


In [8]:
# Ventajas de usar indices
# Opcion 1: Busqueda de nombres en base original
df[df["Nombre"].isin(["Bernie","Max"])]

Unnamed: 0,Nombre,Raza,Color,Altura_cm,Peso_kg,Fecha_Nac
4,Bernie,San Bernardo,Blanco,77,74,2011-12-11
5,Max,Chow Chow,Blanco,45,28,2019-03-27


In [9]:
# Ventajas de usar indices 
# Opcion 2: Busqueda de nombres en base con indices de nombres
df_ind = df.set_index("Nombre")
df_ind.loc[["Bernie","Max"]]

Unnamed: 0_level_0,Raza,Color,Altura_cm,Peso_kg,Fecha_Nac
Nombre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bernie,San Bernardo,Blanco,77,74,2011-12-11
Max,Chow Chow,Blanco,45,28,2019-03-27


In [10]:
# Generacion de indices multiples (Indices jerarquicos)
df_ind2 = df.set_index(["Raza","Color"])
df_ind2

Unnamed: 0_level_0,Unnamed: 1_level_0,Nombre,Altura_cm,Peso_kg,Fecha_Nac
Raza,Color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Labrador,Cafe,Bella,56,24,2013-07-01
Poodle,Negro,Charlie,43,24,2016-09-16
Chow Chow,Cafe,Lucy,46,24,2014-08-25
Schnauzer,Gris,Cooper,49,17,2016-09-16
San Bernardo,Blanco,Bernie,77,74,2011-12-11
Chow Chow,Blanco,Max,45,28,2019-03-27
Schnauzer,Negro,Reed,45,15,2017-09-18


In [11]:
# Consulta sobre una caracteristica
df_ind2.loc[["Schnauzer","Poodle"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Nombre,Altura_cm,Peso_kg,Fecha_Nac
Raza,Color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Schnauzer,Gris,Cooper,49,17,2016-09-16
Schnauzer,Negro,Reed,45,15,2017-09-18
Poodle,Negro,Charlie,43,24,2016-09-16


In [12]:
# Consulta sobre dos caracteristicas (con tuplas)
df_ind2.loc[[("Schnauzer","Gris"),("Poodle","Negro")]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Nombre,Altura_cm,Peso_kg,Fecha_Nac
Raza,Color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Schnauzer,Gris,Cooper,49,17,2016-09-16
Poodle,Negro,Charlie,43,24,2016-09-16


In [13]:
# Ordenamiento por primer indice de referencia
df_ind2.sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,Nombre,Altura_cm,Peso_kg,Fecha_Nac
Raza,Color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Chow Chow,Blanco,Max,45,28,2019-03-27
Chow Chow,Cafe,Lucy,46,24,2014-08-25
Labrador,Cafe,Bella,56,24,2013-07-01
Poodle,Negro,Charlie,43,24,2016-09-16
San Bernardo,Blanco,Bernie,77,74,2011-12-11
Schnauzer,Gris,Cooper,49,17,2016-09-16
Schnauzer,Negro,Reed,45,15,2017-09-18


In [31]:
# Ordenamiento por mas de un indice
df_ind2.sort_index(level=["Color","Raza"], ascending = [True, False])

Unnamed: 0_level_0,Unnamed: 1_level_0,Nombre,Altura_cm,Peso_kg,Fecha_Nac
Raza,Color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
San Bernardo,Blanco,Bernie,77,74,2011-12-11
Chow Chow,Blanco,Max,45,28,2019-03-27
Labrador,Cafe,Bella,56,24,2013-07-01
Chow Chow,Cafe,Lucy,46,24,2014-08-25
Schnauzer,Gris,Cooper,49,17,2016-09-16
Schnauzer,Negro,Reed,45,15,2017-09-18
Poodle,Negro,Charlie,43,24,2016-09-16


In [33]:
# Extraccion de los primeros 3 registros
df[:3]

Unnamed: 0,Nombre,Raza,Color,Altura_cm,Peso_kg,Fecha_Nac
0,Bella,Labrador,Cafe,56,24,2013-07-01
1,Charlie,Poodle,Negro,43,24,2016-09-16
2,Lucy,Chow Chow,Cafe,46,24,2014-08-25


In [35]:
# Extraccion de los registros de indice 2 al 4
df[2:5]

Unnamed: 0,Nombre,Raza,Color,Altura_cm,Peso_kg,Fecha_Nac
2,Lucy,Chow Chow,Cafe,46,24,2014-08-25
3,Cooper,Schnauzer,Gris,49,17,2016-09-16
4,Bernie,San Bernardo,Blanco,77,74,2011-12-11


In [37]:
# Manera alternativa de extraer a todo el DataFrame
df[:]

Unnamed: 0,Nombre,Raza,Color,Altura_cm,Peso_kg,Fecha_Nac
0,Bella,Labrador,Cafe,56,24,2013-07-01
1,Charlie,Poodle,Negro,43,24,2016-09-16
2,Lucy,Chow Chow,Cafe,46,24,2014-08-25
3,Cooper,Schnauzer,Gris,49,17,2016-09-16
4,Bernie,San Bernardo,Blanco,77,74,2011-12-11
5,Max,Chow Chow,Blanco,45,28,2019-03-27
6,Reed,Schnauzer,Negro,45,15,2017-09-18


In [39]:
# Extraccion de DataFrames con indices jerarquicos
df_ind2 = df.set_index(["Raza","Color"])
df_ind2

Unnamed: 0_level_0,Unnamed: 1_level_0,Nombre,Altura_cm,Peso_kg,Fecha_Nac
Raza,Color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Labrador,Cafe,Bella,56,24,2013-07-01
Poodle,Negro,Charlie,43,24,2016-09-16
Chow Chow,Cafe,Lucy,46,24,2014-08-25
Schnauzer,Gris,Cooper,49,17,2016-09-16
San Bernardo,Blanco,Bernie,77,74,2011-12-11
Chow Chow,Blanco,Max,45,28,2019-03-27
Schnauzer,Negro,Reed,45,15,2017-09-18


In [45]:
# Primero se tienen que ordenar los datos
df_ind2 = df_ind2.sort_index(level="Raza", ascending = True)
df_ind2

Unnamed: 0_level_0,Unnamed: 1_level_0,Nombre,Altura_cm,Peso_kg,Fecha_Nac
Raza,Color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Chow Chow,Blanco,Max,45,28,2019-03-27
Chow Chow,Cafe,Lucy,46,24,2014-08-25
Labrador,Cafe,Bella,56,24,2013-07-01
Poodle,Negro,Charlie,43,24,2016-09-16
San Bernardo,Blanco,Bernie,77,74,2011-12-11
Schnauzer,Gris,Cooper,49,17,2016-09-16
Schnauzer,Negro,Reed,45,15,2017-09-18


In [47]:
# Luego se extrae la informacion en base a los indices
df_ind2.loc["Chow Chow":"Poodle"]

Unnamed: 0_level_0,Unnamed: 1_level_0,Nombre,Altura_cm,Peso_kg,Fecha_Nac
Raza,Color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Chow Chow,Blanco,Max,45,28,2019-03-27
Chow Chow,Cafe,Lucy,46,24,2014-08-25
Labrador,Cafe,Bella,56,24,2013-07-01
Poodle,Negro,Charlie,43,24,2016-09-16


In [49]:
# Extraccion multiple
df_ind2.loc[("Chow Chow","Cafe"):("Schnauzer","Gris")]

Unnamed: 0_level_0,Unnamed: 1_level_0,Nombre,Altura_cm,Peso_kg,Fecha_Nac
Raza,Color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Chow Chow,Cafe,Lucy,46,24,2014-08-25
Labrador,Cafe,Bella,56,24,2013-07-01
Poodle,Negro,Charlie,43,24,2016-09-16
San Bernardo,Blanco,Bernie,77,74,2011-12-11
Schnauzer,Gris,Cooper,49,17,2016-09-16


In [51]:
# Corte de columnas en DataFrame
df_ind3 = df_ind2.loc[:,"Nombre":"Peso_kg"]
df_ind3

Unnamed: 0_level_0,Unnamed: 1_level_0,Nombre,Altura_cm,Peso_kg
Raza,Color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Chow Chow,Blanco,Max,45,28
Chow Chow,Cafe,Lucy,46,24
Labrador,Cafe,Bella,56,24
Poodle,Negro,Charlie,43,24
San Bernardo,Blanco,Bernie,77,74
Schnauzer,Gris,Cooper,49,17
Schnauzer,Negro,Reed,45,15


In [53]:
# Extraccion simultanea de renglones y columnas
df_ind4 = df_ind2.loc[("Chow Chow", "Cafe"):("Schnauzer","Gris"),"Nombre":"Peso_kg"]
df_ind4

Unnamed: 0_level_0,Unnamed: 1_level_0,Nombre,Altura_cm,Peso_kg
Raza,Color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Chow Chow,Cafe,Lucy,46,24
Labrador,Cafe,Bella,56,24
Poodle,Negro,Charlie,43,24
San Bernardo,Blanco,Bernie,77,74
Schnauzer,Gris,Cooper,49,17


In [55]:
# Extraccion simultanea de renglones y columnas
# Opcion alternativa por numero de renglon y numero de columna
df_ind4.iloc[1:3,0:1]

Unnamed: 0_level_0,Unnamed: 1_level_0,Nombre
Raza,Color,Unnamed: 2_level_1
Labrador,Cafe,Bella
Poodle,Negro,Charlie


In [59]:
df_altura_por_raza_color = df.pivot_table("Altura_cm", index = "Raza", columns = "Color")
df_altura_por_raza_color

Color,Blanco,Cafe,Gris,Negro
Raza,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Chow Chow,45.0,46.0,,
Labrador,,56.0,,
Poodle,,,,43.0
San Bernardo,77.0,,,
Schnauzer,,,49.0,45.0


In [63]:
df_altura_por_raza_color.loc["Labrador":"San Bernardo"]

Color,Blanco,Cafe,Gris,Negro
Raza,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Labrador,,56.0,,
Poodle,,,,43.0
San Bernardo,77.0,,,


In [65]:
# Calculo de promedios para TODOS los renglones (por columna)
df_altura_por_raza_color.mean(axis = "index")

Color
Blanco    61.0
Cafe      51.0
Gris      49.0
Negro     44.0
dtype: float64

In [69]:
# Calculo de promedio para TODAS las columnas (por renglon)
df_altura_por_raza_color.mean(axis = "columns")

Raza
Chow Chow       45.5
Labrador        56.0
Poodle          43.0
San Bernardo    77.0
Schnauzer       47.0
dtype: float64