In [3]:
import numpy as np
import pandas as pd

array = np.array([["Bella","Labrador","Cafe",56,24,"2013-07-01"],
                  ["Charlie","Poodle","Negro",43,24,"2016-09-16"],
                  ["Lucy","Chow Chow","Cafe",46,24,"2014-08-25"],
                  ["Cooper","Schnauzer","Gris",49,17,"2016-09-16"],
                  ["Bernie","San Bernardo","Blanco",77,74,"2011-12-11"],
                  ["Max","Chow Chow","Blanco",45,28,"2019-03-27"],
                  ["Reed","Schnauzer","Negro",45,15,"2017-09-18"]])

In [5]:
df = pd.DataFrame(data = array, columns = ["Nombre","Raza","Color","Altura_cm","Peso_kg","Fecha_Nac"])
df["Altura_cm"] = df["Altura_cm"].astype(int)
df["Peso_kg"] = df["Peso_kg"].astype(int)
df

Unnamed: 0,Nombre,Raza,Color,Altura_cm,Peso_kg,Fecha_Nac
0,Bella,Labrador,Cafe,56,24,2013-07-01
1,Charlie,Poodle,Negro,43,24,2016-09-16
2,Lucy,Chow Chow,Cafe,46,24,2014-08-25
3,Cooper,Schnauzer,Gris,49,17,2016-09-16
4,Bernie,San Bernardo,Blanco,77,74,2011-12-11
5,Max,Chow Chow,Blanco,45,28,2019-03-27
6,Reed,Schnauzer,Negro,45,15,2017-09-18


In [15]:
#Calculo de la media
df["Peso_kg"].mean()

29.428571428571427

In [11]:
#Calculo de la fecha de nacimiento mas antigua
df["Fecha_Nac"].min()

'2011-12-11'

In [19]:
# Las funciones de agregacion son que le vas a pasar la columna de datos y va a ejecutar algo, solo va a retornar un valor
#                             Como por ejemplo avg, min, max etc. Aunque puedes crear las propias
def retorna_minimo(columna):
    return min(columna)

df["Altura_cm"].agg(retorna_minimo)

43

In [21]:
def mayor77(columna):
    return max(columna) > 77

df["Altura_cm"].agg(mayor77)

False

In [9]:
# Funcion de la agregacion estadistica
def percentil80(columna):
    return columna.quantile(0.80)

df["Altura_cm"].agg(percentil80)

54.60000000000001

In [23]:
# Agrupacion estadistica en varias columnas
df[["Altura_cm","Peso_kg"]].agg(percentil80)

Altura_cm    54.6
Peso_kg      27.2
dtype: float64

In [25]:
# Aplicacion de estadisticas multiples
def percentil90(columna):
    return columna.quantile(0.90)

df["Altura_cm"].agg([percentil80, percentil90])

percentil80    54.6
percentil90    64.4
Name: Altura_cm, dtype: float64

In [27]:
df["Altura_cm"]

0    56
1    43
2    46
3    49
4    77
5    45
6    45
Name: Altura_cm, dtype: int64

In [29]:
df["Altura_cm"].cumsum()

0     56
1     99
2    145
3    194
4    271
5    316
6    361
Name: Altura_cm, dtype: int64

In [31]:
df["Altura_cm"].cummin()

0    56
1    43
2    43
3    43
4    43
5    43
6    43
Name: Altura_cm, dtype: int64

In [33]:
df["Raza"].value_counts()

Raza
Chow Chow       2
Schnauzer       2
Labrador        1
Poodle          1
San Bernardo    1
Name: count, dtype: int64

In [35]:
# Proporcion de perros por raza
df["Raza"].value_counts(normalize=True)

Raza
Chow Chow       0.285714
Schnauzer       0.285714
Labrador        0.142857
Poodle          0.142857
San Bernardo    0.142857
Name: proportion, dtype: float64

In [39]:
# Resumen por grupo
print(df[df["Color"] == "Blanco"]["Peso_kg"].mean())
print(df[df["Color"] == "Cafe"]["Peso_kg"].mean())
print(df[df["Color"] == "Gris"]["Peso_kg"].mean())
print(df[df["Color"] == "Negro"]["Peso_kg"].mean())

51.0
24.0
17.0
19.5


In [41]:
# Resumen por grupo: Mejor Opcion
df.groupby("Color")["Peso_kg"].mean()

Color
Blanco    51.0
Cafe      24.0
Gris      17.0
Negro     19.5
Name: Peso_kg, dtype: float64

In [45]:
# Agrupacion por color en base a diversas estadisticas
df.groupby("Color")["Peso_kg"].agg([min,max,sum])

  df.groupby("Color")["Peso_kg"].agg([min,max,sum])
  df.groupby("Color")["Peso_kg"].agg([min,max,sum])
  df.groupby("Color")["Peso_kg"].agg([min,max,sum])


Unnamed: 0_level_0,min,max,sum
Color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Blanco,28,74,102
Cafe,24,24,48
Gris,17,17,17
Negro,15,24,39


In [47]:
df.groupby(["Color","Raza"])["Peso_kg"].mean()

Color   Raza        
Blanco  Chow Chow       28.0
        San Bernardo    74.0
Cafe    Chow Chow       24.0
        Labrador        24.0
Gris    Schnauzer       17.0
Negro   Poodle          24.0
        Schnauzer       15.0
Name: Peso_kg, dtype: float64

In [49]:
df.pivot_table(values = "Peso_kg", index = "Color", aggfunc = [np.mean, np.median])

  df.pivot_table(values = "Peso_kg", index = "Color", aggfunc = [np.mean, np.median])
  df.pivot_table(values = "Peso_kg", index = "Color", aggfunc = [np.mean, np.median])


Unnamed: 0_level_0,mean,median
Unnamed: 0_level_1,Peso_kg,Peso_kg
Color,Unnamed: 1_level_2,Unnamed: 2_level_2
Blanco,51.0,51.0
Cafe,24.0,24.0
Gris,17.0,17.0
Negro,19.5,19.5


In [51]:
# Obtencion de medio para dos variables
df.pivot_table(values = "Peso_kg", index = "Color", columns = "Raza") # Media se toma por omision

Raza,Chow Chow,Labrador,Poodle,San Bernardo,Schnauzer
Color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Blanco,28.0,,,74.0,
Cafe,24.0,24.0,,,
Gris,,,,,17.0
Negro,,,24.0,,15.0


In [53]:
df.pivot_table(values = "Peso_kg", index = "Color", columns = "Raza", fill_value = 0)

Raza,Chow Chow,Labrador,Poodle,San Bernardo,Schnauzer
Color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Blanco,28.0,0.0,0.0,74.0,0.0
Cafe,24.0,24.0,0.0,0.0,0.0
Gris,0.0,0.0,0.0,0.0,17.0
Negro,0.0,0.0,24.0,0.0,15.0


In [57]:
df.pivot_table(values = "Peso_kg", index = "Color", columns = "Raza", fill_value = 0, margins = True, aggfunc = np.mean)

  df.pivot_table(values = "Peso_kg", index = "Color", columns = "Raza", fill_value = 0, margins = True, aggfunc = np.mean)
  df.pivot_table(values = "Peso_kg", index = "Color", columns = "Raza", fill_value = 0, margins = True, aggfunc = np.mean)
  df.pivot_table(values = "Peso_kg", index = "Color", columns = "Raza", fill_value = 0, margins = True, aggfunc = np.mean)


Raza,Chow Chow,Labrador,Poodle,San Bernardo,Schnauzer,All
Color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Blanco,28.0,0.0,0.0,74.0,0.0,51.0
Cafe,24.0,24.0,0.0,0.0,0.0,24.0
Gris,0.0,0.0,0.0,0.0,17.0,17.0
Negro,0.0,0.0,24.0,0.0,15.0,19.5
All,26.0,24.0,24.0,74.0,16.0,29.428571


In [59]:
df.pivot_table(values = "Peso_kg", index = "Color", columns = "Raza", fill_value = 0, margins = True, aggfunc = np.median)

  df.pivot_table(values = "Peso_kg", index = "Color", columns = "Raza", fill_value = 0, margins = True, aggfunc = np.median)
  df.pivot_table(values = "Peso_kg", index = "Color", columns = "Raza", fill_value = 0, margins = True, aggfunc = np.median)
  df.pivot_table(values = "Peso_kg", index = "Color", columns = "Raza", fill_value = 0, margins = True, aggfunc = np.median)


Raza,Chow Chow,Labrador,Poodle,San Bernardo,Schnauzer,All
Color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Blanco,28.0,0.0,0.0,74.0,0.0,51.0
Cafe,24.0,24.0,0.0,0.0,0.0,24.0
Gris,0.0,0.0,0.0,0.0,17.0,17.0
Negro,0.0,0.0,24.0,0.0,15.0,19.5
All,26.0,24.0,24.0,74.0,16.0,24.0


NameError: name 'final_result' is not defined