
# Groups and pivot tables

## How group operations work

## Group operations in pandas

In [None]:
columns = ['ProductID', 'Product Name', 'Channel', 'Unit Price', 'Quantity', 'Total']
sample_df = ledger_df[columns].tail(10)

In [None]:
sample_df

In [None]:
sample_df.groupby('Channel').agg({'Total': 'sum'})

### The pandas group object

In [None]:
groups = sample_df.groupby('Channel')

In [None]:
type(groups)

In [None]:
groups.groups

In [None]:
groups.get_group('Bullseye')

In [None]:
groups['Total'].get_group('Bullseye')

In [None]:
groups[['Quantity', 'Total']].get_group('Bullseye')

In [None]:
groups['Total'].sum()

In [None]:
groups[['Quantity', 'Total']].sum()

In [None]:
sample_df.groupby('Channel')['Total'].sum()

### Aggregating group functions

In [None]:
# this works as well
# sample_df.groupby('Channel')['Total'].agg('sum')
sample_df.groupby('Channel').agg({'Total': 'sum'})

In [None]:
(sample_df
    .groupby('Channel').agg({
        'Total': ['sum', 'mean'],
        'Quantity': ['sum', 'max', 'min']
    })
) 

In [None]:
# assigns the output above
# to another variable 
aggregate_df = (
  sample_df
    .groupby('Channel').agg({
        'Total': ['sum', 'mean'],
        'Quantity': ['sum', 'max', 'min']
    })
) 

# selects the max column under Quantity
aggregate_df.loc[:, ('Quantity', 'max')]

In [None]:
aggregate_df.loc[:, 'Quantity']

#### Custom aggregating functions

In [None]:
def total_diff(column):
    return column.max() - column.min()    

sample_df.groupby('Channel').agg({'Total': ['min', 'max', total_diff]})

In [None]:
def custom_aggregating_function(column):
    return column

sample_df.groupby('Channel').agg({'Total': custom_aggregating_function})

### Overthinking: Other group functions

In [None]:
def filter_group(group_df):
    return group_df if group_df['Total'].sum() > 200 else None

sample_df.groupby('Channel').apply(filter_group)

In [None]:
sample_df.groupby('Channel').apply(filter_group).reset_index(drop=True)

In [None]:
def percent_group_total(group_df):    
    group_df['% Group Total'] = group_df['Total'] / group_df['Total'].sum() * 100
    group_df['% Group Total'] = group_df['% Group Total'].round(2)
    
    return group_df

sample_df.groupby('Channel').apply(percent_group_total)

## Stacking and unstacking

In [None]:
columns = ['ProductID', 'Channel', 'Total']
sample_df = ledger_df[columns].head()

sample_df

In [None]:
sample_df.stack()

In [None]:
stacked_sample = sample_df.stack() 

stacked_sample.unstack()

## Pivot tables

In [None]:
ledger_df = pd.read_excel('Q1Sales.xlsx')

ledger_df['Deadline'] = pd.to_datetime(ledger_df['Deadline'])
ledger_df['Deadline Quarter'] = ledger_df['Deadline'].dt.to_period(freq='Q-DEC')

ledger_df

In [None]:
ledger_df.groupby(['Channel', 'Deadline Quarter']).agg({'Quantity': 'sum'})

In [None]:
ledger_df.groupby(['Channel', 'Deadline Quarter']).agg({'Quantity': 'sum'}).unstack()

In [None]:
pd.pivot_table(ledger_df, 
               index='Channel', 
               columns='Deadline Quarter', 
               values='Quantity',
               aggfunc='sum')

In [None]:
pd.pivot_table(ledger_df, 
               index='Channel', 
               columns='Deadline Quarter', 
               values='Quantity', 
               aggfunc='sum',
               margins=True,
               margins_name='TOTAL')