In [None]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

import warnings
warnings.simplefilter("ignore")

## One Attribute Group

**Please run all cells before this cell, including the import cell at the top of the notebook.**

In [None]:
all_cones = Table.read_table('cones.csv')
cones = all_cones.drop('Color').exclude(5)
cones

In [None]:
cones.group('Flavor')

In [None]:
cones.group('Flavor', list)

In [None]:
cones.group('Flavor', len)

In [None]:
cones.group('Flavor', min)

In [None]:
min(cones.where('Flavor', 'chocolate').column('Price'))

In [None]:
min(cones.where('Flavor', 'strawberry').column('Price'))

In [None]:
cones.group('Flavor', np.average)

In [None]:
def data_range(x):
    return max(x) - min(x)

In [None]:
cones.group('Flavor', data_range)

In [None]:
nba = Table.read_table('nba_salaries.csv').where('season', 2020).drop('rank', 'season')
nba

In [None]:
teams_and_money = nba.select('team', 'salary')
teams_and_money.group('team', sum).sort(1, descending=True)

In [None]:
nba.group('team', sum)

In [None]:
position_and_money = nba.select('position', 'salary')
position_and_money.group('position')

In [None]:
position_and_money.group('position', np.average)

## Cross Classification

**Please run all cells before this cell, including the previous example cells and the import cell at the top of the notebook.**

In [None]:
all_cones

In [None]:
all_cones.group('Flavor')

In [None]:
all_cones.group(['Flavor', 'Color'])

In [None]:
all_cones.group(['Flavor', 'Color'], max)

In [None]:
nba

In [None]:
nba.drop(0).group(['team', 'position'], np.average)

In [None]:
full_table = Table.read_table('educ_inc.csv')
ca_2014 = full_table.where('Year', are.equal_to('1/1/14 0:00')).where('Age', are.not_equal_to('00 to 17')).drop(0).sort('Population Count')
ca_2014

In [None]:
no_ages = ca_2014.drop(0)
no_ages

In [None]:
no_ages.group([0, 1, 2], sum)

## Example 1: NBA Salaries with group

**Please run all cells before this cell, including the previous example cells and the import cell at the top of the notebook.**

In [None]:
starter_salaries = ...
starter_salaries

In [None]:
nba.where('team', 'Golden State Warriors').select('position', 'salary').group('position', max)

## Pivot Tables

**Please run all cells before this cell, including the previous example cells and the import cell at the top of the notebook.**

In [None]:
all_cones

In [None]:
all_cones.group(['Flavor', 'Color'])

In [None]:
all_cones.pivot('Flavor', 'Color')   # pivot table, contingency table

In [None]:
all_cones.pivot('Color', 'Flavor')

In [None]:
all_cones.pivot('Color', 'Flavor', values = 'Price', collect = max)

In [None]:
nba

In [None]:
nba.drop(0).group(['team', 'position'], np.average)

In [None]:
nba.pivot('position', 'team', 'salary', np.average)

## Example 2: Skyscrapers

**Please run all cells before this cell, including the previous example cells and the import cell at the top of the notebook.**

In [None]:
# From the CORGIS Dataset Project
# By Austin Cory Bart acbart@vt.edu
# Version 2.0.0, created 3/22/2016
# https://corgis-edu.github.io/corgis/csv/skyscrapers/

sky = Table.read_table('skyscrapers.csv')
sky = (sky.with_column('age', 2022 - sky.column('completed'))
          .drop('completed'))
sky.show(3)

In [None]:
sky.select('city', 'material', 'height').group(['city', 'material'], max)

In [None]:
grid = sky.pivot('material', 'city', 'height', max)
grid

In [None]:
tallest_concrete = grid.column('concrete')
tallest_steel = grid.column('steel')
grid.with_column('difference', tallest_steel - tallest_concrete)

In [None]:
sky.select('material', 'city', 'height').group(['city', 'material'], collect=max)

In [None]:
sky_p = sky.pivot('material', 'city', values='height', collect=max)
sky_p.show()

In [None]:
sky_p = sky_p.with_column(
    'difference', 
    abs(sky_p.column('steel') - sky_p.column('concrete'))
)
sky_p

In [None]:
sky_p.sort('difference', descending=True)