# Agrupación de datos
* Split
* Apply
* Combine


In [None]:
import pandas as pd
import seaborn as sns

In [None]:
iris = sns.load_dataset('iris')
titanic = sns.load_dataset('titanic')
tips = sns.load_dataset('tips')

In [None]:
# print(iris.head())
# print(titanic.head())
# print(tips.head())

### groupby

In [None]:
# Group by gender and calculate the average fare
titanic.groupby('sex')['fare'].mean()

In [None]:
# Group by gender and class and calculate the average fare
titanic.groupby(['sex', 'class'])['fare'].mean()

In [None]:
# Group by class and gender and count the number of survivors
titanic.groupby(['class', 'sex'])['survived'].sum()

In [None]:
# Group by day and time and calculate the total bill amount
tips.groupby(['day', 'time'])['total_bill'].sum()

In [None]:
# Group by sex and smoking and calculate the average tip percentage
tips.groupby(['sex', 'time'])['tip'].mean()

### aggregate

In [None]:
titanic['age'].aggregate(['mean', 'median', 'max'])

In [None]:
titanic.agg(
    {
        'age': ['mean', 'median'],
        'fare': ['sum', 'mean']
    }
)

In [None]:
tips.agg(
    {
        'total_bill': ['sum', 'mean'],
        'tip': ['sum', 'mean']
    }
)

## agrregate + groupby

In [None]:
# Group by gender and calculate multiple aggregation functions
titanic.groupby('sex').aggregate(
    {
        'age': 'mean',
        'fare': 'sum'
    }
)

In [None]:
# Group by class
titanic.groupby('pclass').aggregate(
    {
        'age': 'mean',
        'fare': ['sum', 'mean']
    }
)

In [None]:
# Group by day and time and calculate multiple aggregation functions
tips.groupby(['day', 'time']).aggregate(
    {
        'total_bill': 'mean',
        'tip': 'sum',
        'size': 'count'
    }
)

### pivot_table

In [None]:
# survival rate by gender and class
titanic.pivot_table(
    values='survived',
    index='sex',
    columns='pclass',
    aggfunc='mean'
)

In [None]:
# Create a pivot table with average total bill by day and time
tips.pivot_table(
    values='total_bill',
    index='day',
    columns='time',
    aggfunc='mean'
)

### Bonus

In [None]:
# Define a custom function to calculate survival rate
def survival_rate(x):
    return (x == 1).sum() / len(x) * 100

# Group the dataset by gender and class and apply the custom function
titanic.groupby(['sex', 'pclass'])['survived'].apply(survival_rate)
