# Ejemplo 2 Agrupar datos en Pandas

## 1. Instalamos e importamos los módulos

In [1]:
pip install pandas

Note: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np
import pandas as pd

## 2. Generamos un set de datos

In [3]:
gender = ["Male", "Female"]
income = ["Poor", "Middle Class", "Rich"]

In [4]:
n = 100

In [5]:
gender_data = []
income_data = []

for i in range(0,n):
    gender_data.append(np.random.choice(gender))
    income_data.append(np.random.choice(income))

In [6]:
height = 160 + 30 * np.random.randn(n)
weight = 65 + 25 * np.random.randn(n)
age = 30 + 12 * np.random.randn(n)
income = 18000 + 3500 * np.random.rand(n)

In [7]:
df = pd.DataFrame(
    {
        "Gender" : gender_data,
        "Economic Status" : income_data,
        "Height" : height,
        "Weight" : weight,
        "Age" : age,
        "Income" : income
    }
)

In [9]:
df.head(100)

Unnamed: 0,Gender,Economic Status,Height,Weight,Age,Income
0,Male,Rich,158.469906,41.474343,41.208188,18097.761698
1,Female,Poor,156.607469,53.625921,22.207379,18445.587603
2,Female,Rich,166.656723,63.231791,51.171528,20930.119717
3,Male,Poor,166.644739,87.394055,5.243541,21007.879149
4,Male,Middle Class,174.887768,91.042658,31.687781,19792.402986
...,...,...,...,...,...,...
95,Female,Rich,106.622420,72.890533,18.026859,19978.057678
96,Female,Rich,105.946472,50.943543,29.208829,19860.109799
97,Female,Middle Class,209.057645,17.138595,36.181396,19111.663096
98,Male,Middle Class,112.524640,69.436261,19.912042,20997.541152


## 3. Agrupamos los datos

In [10]:
df.groupby("Gender").size()

Gender
Female    57
Male      43
dtype: int64

In [12]:
df.groupby(["Gender", "Economic Status"]).size()

Gender  Economic Status
Female  Middle Class       16
        Poor               20
        Rich               21
Male    Middle Class       15
        Poor               16
        Rich               12
dtype: int64

In [11]:
type(df.groupby("Gender"))

pandas.core.groupby.generic.DataFrameGroupBy

### 3.1 Obtenemos datos de cada grupo

In [48]:
df.groupby("Gender").sum()

Unnamed: 0_level_0,Height,Weight,Age,Income
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,8227.868667,3081.701814,1447.410617,983693.18796
Male,8069.84635,3290.06486,1646.53875,991807.640371


In [49]:
df.groupby(["Gender", "Economic Status"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Height,Weight,Age,Income
Gender,Economic Status,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Female,Middle Class,14,14,14,14
Female,Poor,16,16,16,16
Female,Rich,20,20,20,20
Male,Middle Class,21,21,21,21
Male,Poor,16,16,16,16
Male,Rich,13,13,13,13


In [13]:
df.groupby(["Gender", "Economic Status"]).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Height,Weight,Age,Income
Gender,Economic Status,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Female,Middle Class,176.716253,63.742724,27.581875,19414.671477
Female,Poor,156.781292,61.887188,28.861226,19708.528486
Female,Rich,156.719931,67.647875,32.287462,19725.934789
Male,Middle Class,152.679538,77.505236,28.235858,19795.713199
Male,Poor,153.76098,62.675002,29.956483,20048.359929
Male,Rich,159.377895,56.690633,36.247725,19221.195328


In [50]:
df.groupby(["Gender", "Economic Status"]).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,Height,Weight,Age,Income
Gender,Economic Status,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Female,Middle Class,2310.99958,772.033177,428.879585,273923.466588
Female,Poor,2573.841867,1055.406416,412.336335,309358.981222
Female,Rich,3343.02722,1254.262222,606.194697,400410.740151
Male,Middle Class,3280.06005,1396.812153,734.910331,414866.324304
Male,Poor,2722.191384,1051.12733,452.660945,318156.622007
Male,Rich,2067.594916,842.125376,458.967473,258784.69406


In [53]:
df.groupby(["Gender", "Economic Status"]).size()

Gender  Economic Status
Female  Middle Class       14
        Poor               16
        Rich               20
Male    Middle Class       21
        Poor               16
        Rich               13
dtype: int64

In [54]:
df.groupby(["Gender", "Economic Status"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Height,Height,Height,Height,Height,Height,Height,Height,Weight,Weight,...,Age,Age,Income,Income,Income,Income,Income,Income,Income,Income
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
Gender,Economic Status,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
Female,Middle Class,14.0,165.071399,21.549998,123.067135,154.568453,166.846557,181.905457,197.686877,14.0,55.145227,...,38.39364,44.509585,14.0,19565.961899,964.949153,18288.475458,18819.093336,19480.06953,20111.762952,21155.769218
Female,Poor,16.0,160.865117,26.702154,112.058276,145.061318,164.715009,179.763031,205.241235,16.0,65.962901,...,32.604048,39.621672,16.0,19334.936326,743.400585,18125.319656,18893.297435,19154.548768,20009.630857,20446.331126
Female,Rich,20.0,167.151361,34.656192,105.78748,148.533053,167.598878,178.723358,257.029946,20.0,62.713111,...,39.508275,44.791575,20.0,20020.537008,692.482413,19116.294547,19383.589143,19945.719206,20327.958597,21420.356723
Male,Middle Class,21.0,156.193336,33.609564,71.071725,134.958485,150.745877,178.094137,211.376588,21.0,66.514864,...,43.396818,49.548179,21.0,19755.539253,1037.618101,18170.343794,18871.678753,19546.147955,20625.193052,21352.897374
Male,Poor,16.0,170.136962,19.647337,132.823628,159.380992,175.631117,183.560006,200.444804,16.0,65.695458,...,38.460117,48.463891,16.0,19884.788875,1025.583072,18107.150415,19038.961651,20147.935974,20623.579415,21262.6624
Male,Rich,13.0,159.045763,29.440948,94.318572,142.809259,155.309138,181.117475,197.452504,13.0,64.778875,...,44.470314,50.551993,13.0,19906.514928,1075.124385,18025.591743,19077.480981,19523.618797,20946.299878,21416.964618
